69b089c84d4aff65c7431962a4542996e90e5016
1 /********************* */
2 /*! \file regexp_operation.cpp
4 ** Original author: Tianyi Liang
5 ** Major contributors: Morgan Deters
6 ** Minor contributors (to current version): none
7 ** This file is part of the CVC4 project.
8 ** Copyright (c) 2009-2014 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
12 ** \brief Symbolic Regular Expresion Operations
14 ** Symbolic Regular Expresion Operations
17 #include "theory/strings/regexp_operation.h"
18 #include "expr/kind.h"
24 RegExpOpr::RegExpOpr()
28 d_emptyString
= NodeManager::currentNM()->mkConst( ::CVC4::String("") );
29 d_true
= NodeManager::currentNM()->mkConst( true );
30 d_false
= NodeManager::currentNM()->mkConst( false );
31 d_emptySingleton
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
);
32 d_zero
= NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
33 d_one
= NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
34 std::vector
< Node
> nvec
;
35 d_emptyRegexp
= NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY
, nvec
);
36 d_sigma
= NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA
, nvec
);
37 d_sigma_star
= NodeManager::currentNM()->mkNode( kind::REGEXP_STAR
, d_sigma
);
40 int RegExpOpr::gcd ( int a
, int b
) {
43 c
= a
; a
= b
%a
; b
= c
;
48 bool RegExpOpr::checkConstRegExp( Node r
) {
49 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r
) << std::endl
;
51 if( d_cstre_cache
.find( r
) != d_cstre_cache
.end() ) {
52 ret
= d_cstre_cache
[r
];
54 if(r
.getKind() == kind::STRING_TO_REGEXP
) {
55 Node tmp
= Rewriter::rewrite( r
[0] );
58 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
59 if(!checkConstRegExp(r
[i
])) {
64 d_cstre_cache
[r
] = ret
;
69 // 0-unknown, 1-yes, 2-no
70 int RegExpOpr::delta( Node r
, Node
&exp
) {
71 Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r
) << std::endl
;
73 if( d_delta_cache
.find( r
) != d_delta_cache
.end() ) {
74 ret
= d_delta_cache
[r
].first
;
75 exp
= d_delta_cache
[r
].second
;
79 case kind::REGEXP_EMPTY
: {
83 case kind::REGEXP_SIGMA
: {
87 case kind::STRING_TO_REGEXP
: {
88 Node tmp
= Rewriter::rewrite(r
[0]);
90 if(tmp
== d_emptyString
) {
97 if(tmp
.getKind() == kind::STRING_CONCAT
) {
98 for(unsigned i
=0; i
<tmp
.getNumChildren(); i
++) {
99 if(tmp
[i
].isConst()) {
106 exp
= r
[0].eqNode(d_emptyString
);
111 case kind::REGEXP_CONCAT
: {
113 std::vector
< Node
> vec_nodes
;
114 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
116 int tmp
= delta( r
[i
], exp2
);
120 } else if(tmp
== 0) {
121 vec_nodes
.push_back( exp2
);
129 exp
= vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode(kind::AND
, vec_nodes
);
134 case kind::REGEXP_UNION
: {
136 std::vector
< Node
> vec_nodes
;
137 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
139 int tmp
= delta( r
[i
], exp2
);
143 } else if(tmp
== 0) {
144 vec_nodes
.push_back( exp2
);
152 exp
= vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode(kind::OR
, vec_nodes
);
157 case kind::REGEXP_INTER
: {
159 std::vector
< Node
> vec_nodes
;
160 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
162 int tmp
= delta( r
[i
], exp2
);
166 } else if(tmp
== 0) {
167 vec_nodes
.push_back( exp2
);
175 exp
= vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode(kind::AND
, vec_nodes
);
180 case kind::REGEXP_STAR
: {
184 case kind::REGEXP_PLUS
: {
185 ret
= delta( r
[0], exp
);
188 case kind::REGEXP_OPT
: {
192 case kind::REGEXP_RANGE
: {
197 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in delta of RegExp." << std::endl
;
199 //return Node::null();
203 exp
= Rewriter::rewrite(exp
);
205 std::pair
< int, Node
> p(ret
, exp
);
206 d_delta_cache
[r
] = p
;
208 Trace("regexp-delta") << "RegExp-Delta returns : " << ret
<< std::endl
;
212 // 0-unknown, 1-yes, 2-no
213 int RegExpOpr::derivativeS( Node r
, CVC4::String c
, Node
&retNode
) {
214 Assert( c
.size() < 2 );
215 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r
) << " }, c=" << c
<< std::endl
;
218 retNode
= d_emptyRegexp
;
220 PairNodeStr dv
= std::make_pair( r
, c
);
221 if( d_deriv_cache
.find( dv
) != d_deriv_cache
.end() ) {
222 retNode
= d_deriv_cache
[dv
].first
;
223 ret
= d_deriv_cache
[dv
].second
;
224 } else if( c
.isEmptyString() ) {
226 ret
= delta( r
, expNode
);
228 retNode
= NodeManager::currentNM()->mkNode(kind::ITE
, expNode
, r
, d_emptyRegexp
);
229 } else if(ret
== 1) {
232 std::pair
< Node
, int > p(retNode
, ret
);
233 d_deriv_cache
[dv
] = p
;
235 switch( r
.getKind() ) {
236 case kind::REGEXP_EMPTY
: {
240 case kind::REGEXP_SIGMA
: {
241 retNode
= d_emptySingleton
;
244 case kind::STRING_TO_REGEXP
: {
245 Node tmp
= Rewriter::rewrite(r
[0]);
247 if(tmp
== d_emptyString
) {
250 if(tmp
.getConst
< CVC4::String
>().getFirstChar() == c
.getFirstChar()) {
251 retNode
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
252 tmp
.getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkConst( tmp
.getConst
< CVC4::String
>().substr(1) ) );
260 if(tmp
.getKind() == kind::STRING_CONCAT
) {
263 if(t2
.getConst
< CVC4::String
>().getFirstChar() == c
.getFirstChar()) {
264 Node n
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
265 tmp
.getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkConst( tmp
.getConst
< CVC4::String
>().substr(1) ) );
266 std::vector
< Node
> vec_nodes
;
267 vec_nodes
.push_back(n
);
268 for(unsigned i
=1; i
<tmp
.getNumChildren(); i
++) {
269 vec_nodes
.push_back(tmp
[i
]);
271 retNode
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vec_nodes
);
278 std::vector
< Node
> vec_nodes
;
279 for(unsigned i
=1; i
<tmp
.getNumChildren(); i
++) {
280 vec_nodes
.push_back(tmp
[i
]);
282 rest
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vec_nodes
);
286 Node sk
= NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
287 retNode
= NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, sk
);
289 retNode
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, retNode
, rest
));
291 Node exp
= tmp
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
,
292 NodeManager::currentNM()->mkConst(c
), sk
));
293 retNode
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE
, exp
, retNode
, d_emptyRegexp
));
298 case kind::REGEXP_CONCAT
: {
299 std::vector
< Node
> vec_nodes
;
300 std::vector
< Node
> delta_nodes
;
302 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
305 int rt
= derivativeS(r
[i
], c
, dc
);
310 std::vector
< Node
> vec_nodes2
;
311 if(dc
!= d_emptySingleton
) {
312 vec_nodes2
.push_back( dc
);
314 for(unsigned j
=i
+1; j
<r
.getNumChildren(); ++j
) {
315 if(r
[j
] != d_emptySingleton
) {
316 vec_nodes2
.push_back( r
[j
] );
319 Node tmp
= vec_nodes2
.size()==0 ? d_emptySingleton
:
320 vec_nodes2
.size()==1 ? vec_nodes2
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, vec_nodes2
);
321 if(dnode
!= d_true
) {
322 tmp
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE
, dnode
, tmp
, d_emptyRegexp
));
325 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), tmp
) == vec_nodes
.end()) {
326 vec_nodes
.push_back( tmp
);
330 int rt2
= delta( r
[i
], exp3
);
332 dnode
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND
, dnode
, exp3
));
333 } else if( rt2
== 2 ) {
337 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
338 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
339 if(retNode
== d_emptyRegexp
) {
344 case kind::REGEXP_UNION
: {
345 std::vector
< Node
> vec_nodes
;
346 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
348 int rt
= derivativeS(r
[i
], c
, dc
);
353 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
354 vec_nodes
.push_back( dc
);
357 Trace("regexp-derive") << "RegExp-derive OR R[" << i
<< "]{ " << mkString(r
[i
]) << " returns " << mkString(dc
) << std::endl
;
359 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
360 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
361 if(retNode
== d_emptyRegexp
) {
366 case kind::REGEXP_INTER
: {
368 bool flag_sg
= false;
369 std::vector
< Node
> vec_nodes
;
370 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
372 int rt
= derivativeS(r
[i
], c
, dc
);
379 if(dc
== d_sigma_star
) {
382 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
383 vec_nodes
.push_back( dc
);
388 if(vec_nodes
.size() == 0 && flag_sg
) {
389 retNode
= d_sigma_star
;
391 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
392 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER
, vec_nodes
) );
393 if(retNode
== d_emptyRegexp
) {
398 retNode
= d_emptyRegexp
;
403 case kind::REGEXP_STAR
: {
405 ret
= derivativeS(r
[0], c
, dc
);
406 retNode
= dc
==d_emptyRegexp
? dc
: (dc
==d_emptySingleton
? r
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r
));
410 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in derivative of RegExp." << std::endl
;
411 Assert( false, "Unsupported Term" );
414 if(retNode
!= d_emptyRegexp
) {
415 retNode
= Rewriter::rewrite( retNode
);
417 std::pair
< Node
, int > p(retNode
, ret
);
418 d_deriv_cache
[dv
] = p
;
421 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode
) << std::endl
;
425 Node
RegExpOpr::derivativeSingle( Node r
, CVC4::String c
) {
426 Assert( c
.size() < 2 );
427 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r
) << " }, c=" << c
<< std::endl
;
428 Node retNode
= d_emptyRegexp
;
429 PairNodeStr dv
= std::make_pair( r
, c
);
430 if( d_dv_cache
.find( dv
) != d_dv_cache
.end() ) {
431 retNode
= d_dv_cache
[dv
];
432 } else if( c
.isEmptyString() ){
434 int tmp
= delta( r
, exp
);
437 retNode
= d_emptyRegexp
;
438 } else if(tmp
== 1) {
441 retNode
= d_emptyRegexp
;
446 case kind::REGEXP_EMPTY
: {
447 retNode
= d_emptyRegexp
;
450 case kind::REGEXP_SIGMA
: {
451 retNode
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
);
454 case kind::STRING_TO_REGEXP
: {
456 if(r
[0] == d_emptyString
) {
457 retNode
= d_emptyRegexp
;
459 if(r
[0].getConst
< CVC4::String
>().getFirstChar() == c
.getFirstChar()) {
460 retNode
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
461 r
[0].getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkConst( r
[0].getConst
< CVC4::String
>().substr(1) ) );
463 retNode
= d_emptyRegexp
;
468 retNode
= d_emptyRegexp
;
472 case kind::REGEXP_CONCAT
: {
473 Node rees
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
);
474 std::vector
< Node
> vec_nodes
;
475 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
476 Node dc
= derivativeSingle(r
[i
], c
);
477 if(dc
!= d_emptyRegexp
) {
478 std::vector
< Node
> vec_nodes2
;
480 vec_nodes2
.push_back( dc
);
482 for(unsigned j
=i
+1; j
<r
.getNumChildren(); ++j
) {
484 vec_nodes2
.push_back( r
[j
] );
487 Node tmp
= vec_nodes2
.size()==0 ? rees
:
488 vec_nodes2
.size()==1 ? vec_nodes2
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, vec_nodes2
);
489 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), tmp
) == vec_nodes
.end()) {
490 vec_nodes
.push_back( tmp
);
494 if( delta( r
[i
], exp
) != 1 ) {
498 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
499 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
502 case kind::REGEXP_UNION
: {
503 std::vector
< Node
> vec_nodes
;
504 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
505 Node dc
= derivativeSingle(r
[i
], c
);
506 if(dc
!= d_emptyRegexp
) {
507 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
508 vec_nodes
.push_back( dc
);
511 Trace("regexp-derive") << "RegExp-derive OR R[" << i
<< "]{ " << mkString(r
[i
]) << " returns " << mkString(dc
) << std::endl
;
513 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
514 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
517 case kind::REGEXP_INTER
: {
519 bool flag_sg
= false;
520 std::vector
< Node
> vec_nodes
;
521 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
522 Node dc
= derivativeSingle(r
[i
], c
);
523 if(dc
!= d_emptyRegexp
) {
524 if(dc
== d_sigma_star
) {
527 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
528 vec_nodes
.push_back( dc
);
537 if(vec_nodes
.size() == 0 && flag_sg
) {
538 retNode
= d_sigma_star
;
540 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
541 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER
, vec_nodes
) );
544 retNode
= d_emptyRegexp
;
548 case kind::REGEXP_STAR
: {
549 Node dc
= derivativeSingle(r
[0], c
);
550 if(dc
!= d_emptyRegexp
) {
551 retNode
= dc
==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
) ? r
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r
);
553 retNode
= d_emptyRegexp
;
558 //TODO: special sym: sigma, none, all
559 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in derivative of RegExp." << std::endl
;
560 Assert( false, "Unsupported Term" );
561 //return Node::null();
564 if(retNode
!= d_emptyRegexp
) {
565 retNode
= Rewriter::rewrite( retNode
);
567 d_dv_cache
[dv
] = retNode
;
569 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode
) << std::endl
;
574 bool RegExpOpr::guessLength( Node r
, int &co
) {
577 case kind::STRING_TO_REGEXP
:
580 co
+= r
[0].getConst
< CVC4::String
>().size();
587 case kind::REGEXP_CONCAT
:
589 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
590 if(!guessLength( r
[i
], co
)) {
597 case kind::REGEXP_UNION
:
600 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
602 if(!guessLength( r
[i
], cop
)) {
608 g_co
= gcd(g_co
, cop
);
614 case kind::REGEXP_INTER
:
617 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
619 if(!guessLength( r
[i
], cop
)) {
625 g_co
= gcd(g_co
, cop
);
631 case kind::REGEXP_STAR
:
638 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in membership of RegExp." << std::endl
;
643 void RegExpOpr::firstChars( Node r
, std::set
<unsigned> &pcset
, SetNodes
&pvset
) {
644 std::map
< Node
, std::pair
< std::set
<unsigned>, SetNodes
> >::const_iterator itr
= d_fset_cache
.find(r
);
645 if(itr
!= d_fset_cache
.end()) {
646 pcset
.insert((itr
->second
).first
.begin(), (itr
->second
).first
.end());
647 pvset
.insert((itr
->second
).second
.begin(), (itr
->second
).second
.end());
649 std::set
<unsigned> cset
;
653 case kind::REGEXP_EMPTY
: {
656 case kind::REGEXP_SIGMA
: {
657 for(unsigned i
=0; i
<d_card
; i
++) {
662 case kind::STRING_TO_REGEXP
: {
663 Node st
= Rewriter::rewrite(r
[0]);
665 CVC4::String s
= st
.getConst
< CVC4::String
>();
669 } else if(st
.getKind() == kind::VARIABLE
) {
672 if(st
[0].isConst()) {
673 CVC4::String s
= st
[0].getConst
< CVC4::String
>();
676 vset
.insert( st
[0] );
681 case kind::REGEXP_CONCAT
: {
682 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
683 firstChars(r
[i
], cset
, vset
);
686 int r
= delta( n
, exp
);
693 case kind::REGEXP_UNION
: {
694 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
695 firstChars(r
[i
], cset
, vset
);
699 case kind::REGEXP_INTER
: {
700 //TODO: Overapproximation for now
701 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
702 firstChars(r
[i
], cset
, vset
);
706 case kind::REGEXP_STAR
: {
707 firstChars(r
[0], cset
, vset
);
711 Trace("strings-regexp") << "Unsupported term: " << r
<< " in getCharSet." << std::endl
;
712 Assert( false, "Unsupported Term" );
715 pcset
.insert(cset
.begin(), cset
.end());
716 pvset
.insert(vset
.begin(), vset
.end());
717 std::pair
< std::set
<unsigned>, SetNodes
> p(cset
, vset
);
720 Trace("regexp-fset") << "FSET( " << mkString(r
) << " ) = { ";
721 for(std::set
<unsigned>::const_iterator itr
= cset
.begin();
722 itr
!= cset
.end(); itr
++) {
723 Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr
) << ",";
725 Trace("regexp-fset") << " }" << std::endl
;
729 bool RegExpOpr::follow( Node r
, CVC4::String c
, std::vector
< char > &vec_chars
) {
732 case kind::STRING_TO_REGEXP
:
735 if(r
[0] != d_emptyString
) {
736 char t1
= r
[0].getConst
< CVC4::String
>().getFirstChar();
737 if(c
.isEmptyString()) {
738 vec_chars
.push_back( t1
);
741 char t2
= c
.getFirstChar();
746 vec_chars
.push_back( c
.substr(1,1).getFirstChar() );
748 vec_chars
.push_back( '\0' );
761 case kind::REGEXP_CONCAT
:
763 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
764 if( follow(r
[i
], c
, vec_chars
) ) {
765 if(vec_chars
[vec_chars
.size() - 1] == '\0') {
766 vec_chars
.pop_back();
767 c
= d_emptyString
.getConst
< CVC4::String
>();
773 vec_chars
.push_back( '\0' );
777 case kind::REGEXP_UNION
:
780 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
781 if( follow(r
[i
], c
, vec_chars
) ) {
788 case kind::REGEXP_INTER
:
790 std::vector
< char > vt2
;
791 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
792 std::vector
< char > v_tmp
;
793 if( !follow(r
[i
], c
, v_tmp
) ) {
796 std::vector
< char > vt3(vt2
);
798 std::set_intersection( vt3
.begin(), vt3
.end(), v_tmp
.begin(), v_tmp
.end(), vt2
.begin() );
799 if(vt2
.size() == 0) {
803 vec_chars
.insert( vec_chars
.end(), vt2
.begin(), vt2
.end() );
807 case kind::REGEXP_STAR
:
809 if(follow(r
[0], c
, vec_chars
)) {
810 if(vec_chars
[vec_chars
.size() - 1] == '\0') {
811 if(c
.isEmptyString()) {
814 vec_chars
.pop_back();
815 c
= d_emptyString
.getConst
< CVC4::String
>();
816 return follow(r
[0], c
, vec_chars
);
822 vec_chars
.push_back( '\0' );
828 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in delta of RegExp." << std::endl
;
829 //AlwaysAssert( false );
830 //return Node::null();
836 Node
RegExpOpr::mkAllExceptOne( char exp_c
) {
837 std::vector
< Node
> vec_nodes
;
838 for(char c
=d_char_start
; c
<=d_char_end
; ++c
) {
840 Node n
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst( ::CVC4::String( c
) ) );
841 vec_nodes
.push_back( n
);
844 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
);
848 void RegExpOpr::simplify(Node t
, std::vector
< Node
> &new_nodes
, bool polarity
) {
849 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t
<< ", polarity=" << polarity
<< std::endl
;
850 Assert(t
.getKind() == kind::STRING_IN_REGEXP
);
851 Node str
= Rewriter::rewrite(t
[0]);
852 Node re
= Rewriter::rewrite(t
[1]);
854 simplifyPRegExp( str
, re
, new_nodes
);
856 simplifyNRegExp( str
, re
, new_nodes
);
858 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes
.size() << "):\n";
859 for(unsigned i
=0; i
<new_nodes
.size(); i
++) {
860 Trace("strings-regexp-simpl") << "\t" << new_nodes
[i
] << std::endl
;
863 void RegExpOpr::simplifyNRegExp( Node s
, Node r
, std::vector
< Node
> &new_nodes
) {
864 std::pair
< Node
, Node
> p(s
, r
);
865 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_simpl_neg_cache
.find(p
);
866 if(itr
!= d_simpl_neg_cache
.end()) {
867 new_nodes
.push_back( itr
->second
);
872 case kind::REGEXP_EMPTY
: {
876 case kind::REGEXP_SIGMA
: {
877 conc
= d_one
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
)).negate();
880 case kind::STRING_TO_REGEXP
: {
881 conc
= s
.eqNode(r
[0]).negate();
884 case kind::REGEXP_CONCAT
: {
885 //TODO: rewrite empty
886 Node lens
= NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
);
887 Node b1
= NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
888 Node b1v
= NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST
, b1
);
889 Node g1
= NodeManager::currentNM()->mkNode( kind::AND
, NodeManager::currentNM()->mkNode(kind::GEQ
, b1
, d_zero
),
890 NodeManager::currentNM()->mkNode( kind::GEQ
, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
), b1
) );
891 Node s1
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL
, s
, d_zero
, b1
));
892 Node s2
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL
, s
, b1
, NodeManager::currentNM()->mkNode(kind::MINUS
, lens
, b1
)));
893 Node s1r1
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s1
, r
[0]).negate();
894 if(r
[0].getKind() == kind::STRING_TO_REGEXP
) {
895 s1r1
= s1
.eqNode(r
[0][0]).negate();
896 } else if(r
[0].getKind() == kind::REGEXP_EMPTY
) {
900 if(r
.getNumChildren() > 2) {
901 std::vector
< Node
> nvec
;
902 for(unsigned i
=1; i
<r
.getNumChildren(); i
++) {
903 nvec
.push_back( r
[i
] );
905 r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, nvec
);
907 r2
= Rewriter::rewrite(r2
);
908 Node s2r2
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s2
, r2
).negate();
909 if(r2
.getKind() == kind::STRING_TO_REGEXP
) {
910 s2r2
= s2
.eqNode(r2
[0]).negate();
911 } else if(r2
.getKind() == kind::REGEXP_EMPTY
) {
915 conc
= NodeManager::currentNM()->mkNode(kind::OR
, s1r1
, s2r2
);
916 conc
= NodeManager::currentNM()->mkNode(kind::IMPLIES
, g1
, conc
);
917 conc
= NodeManager::currentNM()->mkNode(kind::FORALL
, b1v
, conc
);
920 case kind::REGEXP_UNION
: {
921 std::vector
< Node
> c_and
;
922 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
923 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
924 c_and
.push_back( r
[i
][0].eqNode(s
).negate() );
925 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
928 c_and
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]).negate());
931 conc
= c_and
.size() == 0 ? d_true
:
932 c_and
.size() == 1 ? c_and
[0] : NodeManager::currentNM()->mkNode(kind::AND
, c_and
);
935 case kind::REGEXP_INTER
: {
936 bool emptyflag
= false;
937 std::vector
< Node
> c_or
;
938 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
939 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
940 c_or
.push_back( r
[i
][0].eqNode(s
).negate() );
941 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
945 c_or
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]).negate());
951 conc
= c_or
.size() == 1 ? c_or
[0] : NodeManager::currentNM()->mkNode(kind::OR
, c_or
);
955 case kind::REGEXP_STAR
: {
956 if(s
== d_emptyString
) {
958 } else if(r
[0].getKind() == kind::REGEXP_EMPTY
) {
959 conc
= s
.eqNode(d_emptyString
).negate();
960 } else if(r
[0].getKind() == kind::REGEXP_SIGMA
) {
963 Node lens
= NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
);
964 Node sne
= s
.eqNode(d_emptyString
).negate();
965 Node b1
= NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
966 Node b1v
= NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST
, b1
);
967 Node g1
= NodeManager::currentNM()->mkNode( kind::AND
, NodeManager::currentNM()->mkNode(kind::GEQ
, b1
, d_one
),
968 NodeManager::currentNM()->mkNode( kind::GEQ
, lens
, b1
) );
970 Node s1
= NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL
, s
, d_zero
, b1
);
971 Node s2
= NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL
, s
, b1
, NodeManager::currentNM()->mkNode(kind::MINUS
, lens
, b1
));
972 Node s1r1
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s1
, r
[0]).negate();
973 Node s2r2
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s2
, r
).negate();
975 conc
= NodeManager::currentNM()->mkNode(kind::OR
, s1r1
, s2r2
);
976 conc
= NodeManager::currentNM()->mkNode(kind::IMPLIES
, g1
, conc
);
977 conc
= NodeManager::currentNM()->mkNode(kind::FORALL
, b1v
, conc
);
978 conc
= NodeManager::currentNM()->mkNode(kind::AND
, sne
, conc
);
983 Trace("strings-regexp") << "Unsupported term: " << r
<< " in simplifyNRegExp." << std::endl
;
984 Assert( false, "Unsupported Term" );
987 conc
= Rewriter::rewrite( conc
);
988 new_nodes
.push_back( conc
);
989 d_simpl_neg_cache
[p
] = conc
;
992 void RegExpOpr::simplifyPRegExp( Node s
, Node r
, std::vector
< Node
> &new_nodes
) {
993 std::pair
< Node
, Node
> p(s
, r
);
994 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_simpl_cache
.find(p
);
995 if(itr
!= d_simpl_cache
.end()) {
996 new_nodes
.push_back( itr
->second
);
1001 case kind::REGEXP_EMPTY
: {
1005 case kind::REGEXP_SIGMA
: {
1006 conc
= d_one
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
));
1009 case kind::STRING_TO_REGEXP
: {
1010 conc
= s
.eqNode(r
[0]);
1013 case kind::REGEXP_CONCAT
: {
1014 std::vector
< Node
> nvec
;
1015 std::vector
< Node
> cc
;
1016 bool emptyflag
= false;
1017 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1018 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
1019 cc
.push_back( r
[i
][0] );
1020 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
1024 Node sk
= NodeManager::currentNM()->mkSkolem( "rc", s
.getType(), "created for regular expression concat" );
1025 Node lem
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk
, r
[i
]);
1026 nvec
.push_back(lem
);
1033 Node lem
= s
.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
, cc
) );
1034 nvec
.push_back(lem
);
1035 conc
= nvec
.size() == 1 ? nvec
[0] : NodeManager::currentNM()->mkNode(kind::AND
, nvec
);
1039 case kind::REGEXP_UNION
: {
1040 std::vector
< Node
> c_or
;
1041 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1042 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
1043 c_or
.push_back( r
[i
][0].eqNode(s
) );
1044 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
1047 c_or
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]));
1050 conc
= c_or
.size() == 0 ? d_false
:
1051 c_or
.size() == 1 ? c_or
[0] : NodeManager::currentNM()->mkNode(kind::OR
, c_or
);
1054 case kind::REGEXP_INTER
: {
1055 std::vector
< Node
> c_and
;
1056 bool emptyflag
= false;
1057 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1058 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
1059 c_and
.push_back( r
[i
][0].eqNode(s
) );
1060 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
1064 c_and
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]));
1070 conc
= c_and
.size() == 1 ? c_and
[0] : NodeManager::currentNM()->mkNode(kind::AND
, c_and
);
1074 case kind::REGEXP_STAR
: {
1075 if(s
== d_emptyString
) {
1077 } else if(r
[0].getKind() == kind::REGEXP_EMPTY
) {
1078 conc
= s
.eqNode(d_emptyString
);
1079 } else if(r
[0].getKind() == kind::REGEXP_SIGMA
) {
1082 Node se
= s
.eqNode(d_emptyString
);
1083 Node sinr
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[0]);
1084 Node sk1
= NodeManager::currentNM()->mkSkolem( "rs", s
.getType(), "created for regular expression star" );
1085 Node sk2
= NodeManager::currentNM()->mkSkolem( "rs", s
.getType(), "created for regular expression star" );
1086 Node s1nz
= sk1
.eqNode(d_emptyString
).negate();
1087 Node s2nz
= sk2
.eqNode(d_emptyString
).negate();
1088 Node s1inr
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk1
, r
[0]);
1089 Node s2inrs
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk2
, r
);
1090 Node s12
= s
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
, sk1
, sk2
));
1092 conc
= NodeManager::currentNM()->mkNode(kind::AND
, s12
, s1nz
, s2nz
, s1inr
, s2inrs
);
1093 conc
= NodeManager::currentNM()->mkNode(kind::OR
, se
, sinr
, conc
);
1098 Trace("strings-regexp") << "Unsupported term: " << r
<< " in simplifyPRegExp." << std::endl
;
1099 Assert( false, "Unsupported Term" );
1102 conc
= Rewriter::rewrite( conc
);
1103 new_nodes
.push_back( conc
);
1104 d_simpl_cache
[p
] = conc
;
1108 void RegExpOpr::getCharSet( Node r
, std::set
<unsigned> &pcset
, SetNodes
&pvset
) {
1109 std::map
< Node
, std::pair
< std::set
<unsigned>, SetNodes
> >::const_iterator itr
= d_cset_cache
.find(r
);
1110 if(itr
!= d_cset_cache
.end()) {
1111 pcset
.insert((itr
->second
).first
.begin(), (itr
->second
).first
.end());
1112 pvset
.insert((itr
->second
).second
.begin(), (itr
->second
).second
.end());
1114 std::set
<unsigned> cset
;
1116 int k
= r
.getKind();
1118 case kind::REGEXP_EMPTY
: {
1121 case kind::REGEXP_SIGMA
: {
1122 for(unsigned i
=0; i
<d_card
; i
++) {
1127 case kind::STRING_TO_REGEXP
: {
1128 Node st
= Rewriter::rewrite(r
[0]);
1130 CVC4::String s
= st
.getConst
< CVC4::String
>();
1131 s
.getCharSet( cset
);
1132 } else if(st
.getKind() == kind::VARIABLE
) {
1135 for(unsigned i
=0; i
<st
.getNumChildren(); i
++) {
1136 if(st
[i
].isConst()) {
1137 CVC4::String s
= st
[i
].getConst
< CVC4::String
>();
1138 s
.getCharSet( cset
);
1140 vset
.insert( st
[i
] );
1146 case kind::REGEXP_CONCAT
: {
1147 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1148 getCharSet(r
[i
], cset
, vset
);
1152 case kind::REGEXP_UNION
: {
1153 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1154 getCharSet(r
[i
], cset
, vset
);
1158 case kind::REGEXP_INTER
: {
1159 //TODO: Overapproximation for now
1160 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1161 getCharSet(r
[i
], cset
, vset
);
1165 case kind::REGEXP_STAR
: {
1166 getCharSet(r
[0], cset
, vset
);
1170 Trace("strings-regexp") << "Unsupported term: " << r
<< " in getCharSet." << std::endl
;
1171 Assert( false, "Unsupported Term" );
1174 pcset
.insert(cset
.begin(), cset
.end());
1175 pvset
.insert(vset
.begin(), vset
.end());
1176 std::pair
< std::set
<unsigned>, SetNodes
> p(cset
, vset
);
1177 d_cset_cache
[r
] = p
;
1179 Trace("regexp-cset") << "CSET( " << mkString(r
) << " ) = { ";
1180 for(std::set
<unsigned>::const_iterator itr
= cset
.begin();
1181 itr
!= cset
.end(); itr
++) {
1182 Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr
) << ",";
1184 Trace("regexp-cset") << " }" << std::endl
;
1188 bool RegExpOpr::isPairNodesInSet(std::set
< PairNodes
> &s
, Node n1
, Node n2
) {
1189 for(std::set
< PairNodes
>::const_iterator itr
= s
.begin();
1190 itr
!= s
.end(); ++itr
) {
1191 if(itr
->first
== n1
&& itr
->second
== n2
||
1192 itr
->first
== n2
&& itr
->second
== n1
) {
1199 Node
RegExpOpr::intersectInternal( Node r1
, Node r2
, std::map
< unsigned, std::set
< PairNodes
> > cache
, bool &spflag
) {
1200 Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1
) << ",\n " << mkString(r2
) << std::endl
;
1203 return Node::null();
1205 std::pair
< Node
, Node
> p(r1
, r2
);
1206 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_inter_cache
.find(p
);
1208 if(itr
!= d_inter_cache
.end()) {
1209 rNode
= itr
->second
;
1213 } else if(r1
== d_emptyRegexp
|| r2
== d_emptyRegexp
) {
1214 rNode
= d_emptyRegexp
;
1215 } else if(r1
== d_emptySingleton
|| r2
== d_emptySingleton
) {
1217 int r
= delta((r1
== d_emptySingleton
? r2
: r1
), exp
);
1222 rNode
= d_emptySingleton
;
1224 rNode
= d_emptyRegexp
;
1227 std::set
< unsigned > cset
, cset2
;
1228 std::set
< Node
> vset
, vset2
;
1229 getCharSet(r1
, cset
, vset
);
1230 getCharSet(r2
, cset2
, vset2
);
1231 if(vset
.empty() && vset2
.empty()) {
1233 firstChars(r1
, cset
, vset
);
1234 std::vector
< Node
> vec_nodes
;
1236 int flag
= delta(r1
, delta_exp
);
1237 int flag2
= delta(r2
, delta_exp
);
1238 if(flag
!= 2 && flag2
!= 2) {
1239 if(flag
== 1 && flag2
== 1) {
1240 vec_nodes
.push_back(d_emptySingleton
);
1246 if(Trace
.isOn("regexp-debug")) {
1247 Trace("regexp-debug") << "Try CSET( " << cset
.size() << " ) = ";
1248 for(std::set
<unsigned>::const_iterator itr
= cset
.begin();
1249 itr
!= cset
.end(); itr
++) {
1250 Trace("regexp-debug") << *itr
<< ", ";
1252 Trace("regexp-debug") << std::endl
;
1254 for(std::set
<unsigned>::const_iterator itr
= cset
.begin();
1255 itr
!= cset
.end(); itr
++) {
1256 CVC4::String
c( CVC4::String::convertUnsignedIntToChar(*itr
) );
1257 if(!isPairNodesInSet(cache
[ *itr
], r1
, r2
)) {
1258 Node r1l
= derivativeSingle(r1
, c
);
1259 Node r2l
= derivativeSingle(r2
, c
);
1260 std::map
< unsigned, std::set
< PairNodes
> > cache2(cache
);
1261 PairNodes
p(r1l
, r2l
);
1262 cache2
[ *itr
].insert( p
);
1263 Node rt
= intersectInternal(r1l
, r2l
, cache2
, spflag
);
1266 return Node::null();
1268 rt
= Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
,
1269 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(c
)), rt
) );
1270 vec_nodes
.push_back(rt
);
1273 rNode
= vec_nodes
.size()==0 ? d_emptyRegexp
: vec_nodes
.size()==1 ? vec_nodes
[0] :
1274 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vec_nodes
);
1275 rNode
= Rewriter::rewrite( rNode
);
1277 //TODO: non-empty var set
1281 d_inter_cache
[p
] = rNode
;
1283 Trace("regexp-intersect") << "INTERSECT( " << mkString(r1
) << ", " << mkString(r2
) << " ) = " << mkString(rNode
) << std::endl
;
1287 bool RegExpOpr::containC2(unsigned cnt
, Node n
) {
1288 if(n
.getKind() == kind::REGEXP_RV
) {
1289 Assert(n
[0].getConst
<Rational
>() <= RMAXINT
, "Exceeded LONG_MAX in RegExpOpr::containC2");
1290 unsigned y
= n
[0].getConst
<Rational
>().getNumerator().toUnsignedInt();
1292 } else if(n
.getKind() == kind::REGEXP_CONCAT
) {
1293 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1294 if(containC2(cnt
, n
[i
])) {
1298 } else if(n
.getKind() == kind::REGEXP_STAR
) {
1299 return containC2(cnt
, n
[0]);
1300 } else if(n
.getKind() == kind::REGEXP_UNION
) {
1301 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1302 if(containC2(cnt
, n
[i
])) {
1309 Node
RegExpOpr::convert1(unsigned cnt
, Node n
) {
1310 Trace("regexp-debug") << "Converting " << n
<< " at " << cnt
<< "... " << std::endl
;
1312 convert2(cnt
, n
, r1
, r2
);
1313 Trace("regexp-debug") << "... getting r1=" << r1
<< ", and r2=" << r2
<< std::endl
;
1314 Node ret
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
,
1315 NodeManager::currentNM()->mkNode(kind::REGEXP_STAR
, r1
), r2
);
1316 ret
= Rewriter::rewrite( ret
);
1317 Trace("regexp-debug") << "... done convert at " << cnt
<< ", with return " << ret
<< std::endl
;
1320 void RegExpOpr::convert2(unsigned cnt
, Node n
, Node
&r1
, Node
&r2
) {
1321 if(n
== d_emptyRegexp
) {
1324 } else if(n
== d_emptySingleton
) {
1325 r1
= d_emptySingleton
;
1326 r2
= d_emptySingleton
;
1327 } else if(n
.getKind() == kind::REGEXP_RV
) {
1328 Assert(n
[0].getConst
<Rational
>() <= RMAXINT
, "Exceeded LONG_MAX in RegExpOpr::containC2");
1329 unsigned y
= n
[0].getConst
<Rational
>().getNumerator().toUnsignedInt();
1330 r1
= d_emptySingleton
;
1336 } else if(n
.getKind() == kind::REGEXP_CONCAT
) {
1338 //convert2 x (r@(Seq l r1))
1339 // | contains x r1 = let (r2,r3) = convert2 x r1
1340 // in (Seq l r2, r3)
1341 // | otherwise = (Empty, r)
1343 std::vector
<Node
> vr1
, vr2
;
1344 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1345 if(containC2(cnt
, n
[i
])) {
1347 convert2(cnt
, n
[i
], t1
, t2
);
1349 r1
= vr1
.size()==0 ? d_emptyRegexp
: vr1
.size()==1 ? vr1
[0] :
1350 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vr1
);
1352 for( unsigned j
=i
+1; j
<n
.getNumChildren(); j
++ ) {
1353 vr2
.push_back(n
[j
]);
1355 r2
= vr2
.size()==0 ? d_emptyRegexp
: vr2
.size()==1 ? vr2
[0] :
1356 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vr2
);
1360 vr1
.push_back(n
[i
]);
1364 r1
= d_emptySingleton
;
1367 } else if(n
.getKind() == kind::REGEXP_UNION
) {
1368 std::vector
<Node
> vr1
, vr2
;
1369 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1371 convert2(cnt
, n
[i
], t1
, t2
);
1375 r1
= NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vr1
);
1376 r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vr2
);
1377 } else if(n
.getKind() == kind::STRING_TO_REGEXP
) {
1378 r1
= d_emptySingleton
;
1384 Node
RegExpOpr::intersectInternal2( Node r1
, Node r2
, std::map
< PairNodes
, Node
> cache
, bool &spflag
, unsigned cnt
) {
1385 Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1
) << ",\n " << mkString(r2
) << std::endl
;
1386 //if(Trace.isOn("regexp-debug")) {
1387 // Trace("regexp-debug") << "... with cache:\n";
1388 // for(std::map< PairNodes, Node >::const_iterator itr=cache.begin();
1389 // itr!=cache.end();itr++) {
1390 // Trace("regexp-debug") << "(" << itr->first.first << "," << itr->first.second << ")->" << itr->second << std::endl;
1395 return Node::null();
1397 std::pair
< Node
, Node
> p(r1
, r2
);
1398 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_inter_cache
.find(p
);
1400 if(itr
!= d_inter_cache
.end()) {
1401 rNode
= itr
->second
;
1403 if(r1
== d_emptyRegexp
|| r2
== d_emptyRegexp
) {
1404 rNode
= d_emptyRegexp
;
1405 } else if(r1
== d_emptySingleton
|| r2
== d_emptySingleton
) {
1407 int r
= delta((r1
== d_emptySingleton
? r2
: r1
), exp
);
1412 rNode
= d_emptySingleton
;
1414 rNode
= d_emptyRegexp
;
1416 } else if(r1
== r2
) {
1417 rNode
= convert1(cnt
, r1
);
1419 PairNodes
p(r1
, r2
);
1420 std::map
< PairNodes
, Node
>::const_iterator itrcache
= cache
.find(p
);
1421 if(itrcache
!= cache
.end()) {
1422 rNode
= itrcache
->second
;
1424 if(checkConstRegExp(r1
) && checkConstRegExp(r2
)) {
1425 std::vector
< unsigned > cset
;
1426 std::set
< unsigned > cset1
, cset2
;
1427 std::set
< Node
> vset1
, vset2
;
1428 firstChars(r1
, cset1
, vset1
);
1429 firstChars(r2
, cset2
, vset2
);
1430 std::set_intersection(cset1
.begin(), cset1
.end(), cset2
.begin(), cset1
.end(),
1431 std::inserter(cset
, cset
.begin()));
1432 std::vector
< Node
> vec_nodes
;
1434 int flag
= delta(r1
, delta_exp
);
1435 int flag2
= delta(r2
, delta_exp
);
1436 if(flag
!= 2 && flag2
!= 2) {
1437 if(flag
== 1 && flag2
== 1) {
1438 vec_nodes
.push_back(d_emptySingleton
);
1444 if(Trace
.isOn("regexp-debug")) {
1445 Trace("regexp-debug") << "Try CSET( " << cset
.size() << " ) = ";
1446 for(std::vector
<unsigned>::const_iterator itr
= cset
.begin();
1447 itr
!= cset
.end(); itr
++) {
1448 CVC4::String
c( CVC4::String::convertUnsignedIntToChar(*itr
) );
1449 Trace("regexp-debug") << c
<< ", ";
1451 Trace("regexp-debug") << std::endl
;
1453 for(std::vector
<unsigned>::const_iterator itr
= cset
.begin();
1454 itr
!= cset
.end(); itr
++) {
1455 CVC4::String
c( CVC4::String::convertUnsignedIntToChar(*itr
) );
1456 Node r1l
= derivativeSingle(r1
, c
);
1457 Node r2l
= derivativeSingle(r2
, c
);
1458 std::map
< PairNodes
, Node
> cache2(cache
);
1459 PairNodes
p(r1
, r2
);
1460 cache2
[ p
] = NodeManager::currentNM()->mkNode(kind::REGEXP_RV
, NodeManager::currentNM()->mkConst(CVC4::Rational(cnt
)));
1461 Node rt
= intersectInternal2(r1l
, r2l
, cache2
, spflag
, cnt
+1);
1462 rt
= convert1(cnt
, rt
);
1465 return Node::null();
1467 rt
= Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
,
1468 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(c
)), rt
) );
1469 vec_nodes
.push_back(rt
);
1471 rNode
= vec_nodes
.size()==0 ? d_emptyRegexp
: vec_nodes
.size()==1 ? vec_nodes
[0] :
1472 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vec_nodes
);
1473 rNode
= Rewriter::rewrite( rNode
);
1475 //TODO: non-empty var set
1480 d_inter_cache
[p
] = rNode
;
1482 Trace("regexp-intersect") << "End of INTERSECT( " << mkString(r1
) << ", " << mkString(r2
) << " ) = " << mkString(rNode
) << std::endl
;
1485 Node
RegExpOpr::intersect(Node r1
, Node r2
, bool &spflag
) {
1486 //std::map< unsigned, std::set< PairNodes > > cache;
1487 std::map
< PairNodes
, Node
> cache
;
1488 if(checkConstRegExp(r1
) && checkConstRegExp(r2
)) {
1489 return intersectInternal2(r1
, r2
, cache
, spflag
, 1);
1492 return Node::null();
1496 Node
RegExpOpr::complement(Node r
, int &ret
) {
1499 if(d_compl_cache
.find(r
) != d_compl_cache
.end()) {
1500 rNode
= d_compl_cache
[r
].first
;
1501 ret
= d_compl_cache
[r
].second
;
1503 if(r
== d_emptyRegexp
) {
1504 rNode
= d_sigma_star
;
1505 } else if(r
== d_emptySingleton
) {
1506 rNode
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, d_sigma
, d_sigma_star
);
1507 } else if(!checkConstRegExp(r
)) {
1508 //TODO: var to be extended
1511 std::set
<unsigned> cset
;
1513 firstChars(r
, cset
, vset
);
1514 std::vector
< Node
> vec_nodes
;
1515 for(unsigned i
=0; i
<d_card
; i
++) {
1516 CVC4::String c
= CVC4::String::convertUnsignedIntToChar(i
);
1517 Node n
= NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(c
));
1519 if(cset
.find(i
) == cset
.end()) {
1523 derivativeS(r
, c
, r2
);
1527 r2
= complement(r2
, rt
);
1530 n
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, n
, r2
));
1531 vec_nodes
.push_back(n
);
1533 rNode
= vec_nodes
.size()==0? d_emptyRegexp
: vec_nodes
.size()==1? vec_nodes
[0] :
1534 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vec_nodes
);
1536 rNode
= Rewriter::rewrite(rNode
);
1537 std::pair
< Node
, int > p(rNode
, ret
);
1538 d_compl_cache
[r
] = p
;
1540 Trace("regexp-compl") << "COMPL( " << mkString(r
) << " ) = " << mkString(rNode
) << ", ret=" << ret
<< std::endl
;
1544 void RegExpOpr::splitRegExp(Node r
, std::vector
< PairNodes
> &pset
) {
1545 Assert(checkConstRegExp(r
));
1546 if(d_split_cache
.find(r
) != d_split_cache
.end()) {
1547 pset
= d_split_cache
[r
];
1549 switch( r
.getKind() ) {
1550 case kind::REGEXP_EMPTY
: {
1553 case kind::REGEXP_OPT
: {
1554 PairNodes
tmp(d_emptySingleton
, d_emptySingleton
);
1555 pset
.push_back(tmp
);
1557 case kind::REGEXP_RANGE
:
1558 case kind::REGEXP_SIGMA
: {
1559 PairNodes
tmp1(d_emptySingleton
, r
);
1560 PairNodes
tmp2(r
, d_emptySingleton
);
1561 pset
.push_back(tmp1
);
1562 pset
.push_back(tmp2
);
1565 case kind::STRING_TO_REGEXP
: {
1566 Assert(r
[0].isConst());
1567 CVC4::String s
= r
[0].getConst
< CVC4::String
>();
1568 PairNodes
tmp1(d_emptySingleton
, r
);
1569 pset
.push_back(tmp1
);
1570 for(unsigned i
=1; i
<s
.size(); i
++) {
1571 CVC4::String s1
= s
.substr(0, i
);
1572 CVC4::String s2
= s
.substr(i
);
1573 Node n1
= NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(s1
));
1574 Node n2
= NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(s2
));
1575 PairNodes
tmp3(n1
, n2
);
1576 pset
.push_back(tmp3
);
1578 PairNodes
tmp2(r
, d_emptySingleton
);
1579 pset
.push_back(tmp2
);
1582 case kind::REGEXP_CONCAT
: {
1583 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1584 std::vector
< PairNodes
> tset
;
1585 splitRegExp(r
[i
], tset
);
1586 std::vector
< Node
> hvec
;
1587 std::vector
< Node
> tvec
;
1588 for(unsigned j
=0; j
<=i
; j
++) {
1589 hvec
.push_back(r
[j
]);
1591 for(unsigned j
=i
; j
<r
.getNumChildren(); j
++) {
1592 tvec
.push_back(r
[j
]);
1594 for(unsigned j
=0; j
<tset
.size(); j
++) {
1595 hvec
[i
] = tset
[j
].first
;
1596 tvec
[0] = tset
[j
].second
;
1597 Node r1
= Rewriter::rewrite( hvec
.size()==1?hvec
[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, hvec
) );
1598 Node r2
= Rewriter::rewrite( tvec
.size()==1?tvec
[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, tvec
) );
1599 PairNodes
tmp2(r1
, r2
);
1600 pset
.push_back(tmp2
);
1605 case kind::REGEXP_UNION
: {
1606 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1607 std::vector
< PairNodes
> tset
;
1608 splitRegExp(r
[i
], tset
);
1609 pset
.insert(pset
.end(), tset
.begin(), tset
.end());
1613 case kind::REGEXP_INTER
: {
1614 bool spflag
= false;
1616 for(unsigned i
=1; i
<r
.getNumChildren(); i
++) {
1617 tmp
= intersect(tmp
, r
[i
], spflag
);
1619 splitRegExp(tmp
, pset
);
1622 case kind::REGEXP_STAR
: {
1623 std::vector
< PairNodes
> tset
;
1624 splitRegExp(r
[0], tset
);
1625 PairNodes
tmp1(d_emptySingleton
, d_emptySingleton
);
1626 pset
.push_back(tmp1
);
1627 for(unsigned i
=0; i
<tset
.size(); i
++) {
1628 Node r1
= tset
[i
].first
==d_emptySingleton
? r
: NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, r
, tset
[i
].first
);
1629 Node r2
= tset
[i
].second
==d_emptySingleton
? r
: NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, tset
[i
].second
, r
);
1630 PairNodes
tmp2(r1
, r2
);
1631 pset
.push_back(tmp2
);
1635 case kind::REGEXP_PLUS
: {
1636 std::vector
< PairNodes
> tset
;
1637 splitRegExp(r
[0], tset
);
1638 for(unsigned i
=0; i
<tset
.size(); i
++) {
1639 Node r1
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, r
, tset
[i
].first
);
1640 Node r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, tset
[i
].second
, r
);
1641 PairNodes
tmp2(r1
, r2
);
1642 pset
.push_back(tmp2
);
1647 Trace("strings-error") << "Unsupported term: " << r
<< " in splitRegExp." << std::endl
;
1649 //return Node::null();
1652 d_split_cache
[r
] = pset
;
1657 std::string
RegExpOpr::niceChar( Node r
) {
1659 std::string s
= r
.getConst
<CVC4::String
>().toString() ;
1660 return s
== "" ? "{E}" : ( s
== " " ? "{ }" : s
.size()>1? "("+s
+")" : s
);
1662 std::string ss
= "$" + r
.toString();
1666 std::string
RegExpOpr::mkString( Node r
) {
1671 int k
= r
.getKind();
1673 case kind::REGEXP_EMPTY
: {
1677 case kind::REGEXP_SIGMA
: {
1681 case kind::STRING_TO_REGEXP
: {
1682 retStr
+= niceChar( r
[0] );
1685 case kind::REGEXP_CONCAT
: {
1687 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1688 //if(i != 0) retStr += ".";
1689 retStr
+= mkString( r
[i
] );
1694 case kind::REGEXP_UNION
: {
1699 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1700 if(i
!= 0) retStr
+= "|";
1701 retStr
+= mkString( r
[i
] );
1707 case kind::REGEXP_INTER
: {
1709 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1710 if(i
!= 0) retStr
+= "&";
1711 retStr
+= mkString( r
[i
] );
1716 case kind::REGEXP_STAR
: {
1717 retStr
+= mkString( r
[0] );
1721 case kind::REGEXP_PLUS
: {
1722 retStr
+= mkString( r
[0] );
1726 case kind::REGEXP_OPT
: {
1727 retStr
+= mkString( r
[0] );
1731 case kind::REGEXP_RANGE
: {
1733 retStr
+= niceChar( r
[0] );
1735 retStr
+= niceChar( r
[1] );
1739 case kind::REGEXP_RV
: {
1741 retStr
+= r
[0].getConst
<Rational
>().getNumerator().toString();
1746 Trace("strings-error") << "Unsupported term: " << r
<< " in RegExp." << std::endl
;
1748 //return Node::null();
1755 }/* CVC4::theory::strings namespace */
1756 }/* CVC4::theory namespace */
1757 }/* CVC4 namespace */