1 /********************* */
2 /*! \file regexp_operation.cpp
4 ** Original author: Tianyi Liang
5 ** Major contributors: Morgan Deters
6 ** Minor contributors (to current version): none
7 ** This file is part of the CVC4 project.
8 ** Copyright (c) 2009-2014 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
12 ** \brief Symbolic Regular Expresion Operations
14 ** Symbolic Regular Expresion Operations
17 #include "theory/strings/regexp_operation.h"
18 #include "expr/kind.h"
24 RegExpOpr::RegExpOpr() {
25 d_emptyString
= NodeManager::currentNM()->mkConst( ::CVC4::String("") );
26 d_true
= NodeManager::currentNM()->mkConst( true );
27 d_false
= NodeManager::currentNM()->mkConst( false );
28 d_zero
= NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
29 d_one
= NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
30 d_emptySingleton
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
);
31 std::vector
< Node
> nvec
;
32 d_emptyRegexp
= NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY
, nvec
);
33 d_sigma
= NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA
, nvec
);
34 d_sigma_star
= NodeManager::currentNM()->mkNode( kind::REGEXP_STAR
, d_sigma
);
38 int RegExpOpr::gcd ( int a
, int b
) {
41 c
= a
; a
= b
%a
; b
= c
;
46 bool RegExpOpr::checkConstRegExp( Node r
) {
47 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r
) << std::endl
;
49 if( d_cstre_cache
.find( r
) != d_cstre_cache
.end() ) {
50 ret
= d_cstre_cache
[r
];
52 if(r
.getKind() == kind::STRING_TO_REGEXP
) {
53 Node tmp
= Rewriter::rewrite( r
[0] );
56 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
57 if(!checkConstRegExp(r
[i
])) {
62 d_cstre_cache
[r
] = ret
;
67 // 0-unknown, 1-yes, 2-no
68 int RegExpOpr::delta( Node r
, Node
&exp
) {
69 Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r
) << std::endl
;
71 if( d_delta_cache
.find( r
) != d_delta_cache
.end() ) {
72 ret
= d_delta_cache
[r
].first
;
73 exp
= d_delta_cache
[r
].second
;
77 case kind::REGEXP_EMPTY
: {
81 case kind::REGEXP_SIGMA
: {
85 case kind::STRING_TO_REGEXP
: {
86 Node tmp
= Rewriter::rewrite(r
[0]);
88 if(tmp
== d_emptyString
) {
95 if(tmp
.getKind() == kind::STRING_CONCAT
) {
96 for(unsigned i
=0; i
<tmp
.getNumChildren(); i
++) {
97 if(tmp
[i
].isConst()) {
104 exp
= r
[0].eqNode(d_emptyString
);
109 case kind::REGEXP_CONCAT
: {
111 std::vector
< Node
> vec_nodes
;
112 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
114 int tmp
= delta( r
[i
], exp2
);
118 } else if(tmp
== 0) {
119 vec_nodes
.push_back( exp2
);
127 exp
= vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode(kind::AND
, vec_nodes
);
132 case kind::REGEXP_UNION
: {
134 std::vector
< Node
> vec_nodes
;
135 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
137 int tmp
= delta( r
[i
], exp2
);
141 } else if(tmp
== 0) {
142 vec_nodes
.push_back( exp2
);
150 exp
= vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode(kind::OR
, vec_nodes
);
155 case kind::REGEXP_INTER
: {
157 std::vector
< Node
> vec_nodes
;
158 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
160 int tmp
= delta( r
[i
], exp2
);
164 } else if(tmp
== 0) {
165 vec_nodes
.push_back( exp2
);
173 exp
= vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode(kind::AND
, vec_nodes
);
178 case kind::REGEXP_STAR
: {
182 case kind::REGEXP_PLUS
: {
183 ret
= delta( r
[0], exp
);
186 case kind::REGEXP_OPT
: {
190 case kind::REGEXP_RANGE
: {
195 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in delta of RegExp." << std::endl
;
197 //return Node::null();
201 exp
= Rewriter::rewrite(exp
);
203 std::pair
< int, Node
> p(ret
, exp
);
204 d_delta_cache
[r
] = p
;
206 Trace("regexp-delta") << "RegExp-Delta returns : " << ret
<< std::endl
;
210 // 0-unknown, 1-yes, 2-no
211 int RegExpOpr::derivativeS( Node r
, CVC4::String c
, Node
&retNode
) {
212 Assert( c
.size() < 2 );
213 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r
) << " }, c=" << c
<< std::endl
;
216 retNode
= d_emptyRegexp
;
218 PairNodeStr dv
= std::make_pair( r
, c
);
219 if( d_deriv_cache
.find( dv
) != d_deriv_cache
.end() ) {
220 retNode
= d_deriv_cache
[dv
].first
;
221 ret
= d_deriv_cache
[dv
].second
;
222 } else if( c
.isEmptyString() ) {
224 ret
= delta( r
, expNode
);
226 retNode
= NodeManager::currentNM()->mkNode(kind::ITE
, expNode
, r
, d_emptyRegexp
);
227 } else if(ret
== 1) {
230 std::pair
< Node
, int > p(retNode
, ret
);
231 d_deriv_cache
[dv
] = p
;
233 switch( r
.getKind() ) {
234 case kind::REGEXP_EMPTY
: {
238 case kind::REGEXP_SIGMA
: {
239 retNode
= d_emptySingleton
;
242 case kind::STRING_TO_REGEXP
: {
243 Node tmp
= Rewriter::rewrite(r
[0]);
245 if(tmp
== d_emptyString
) {
248 if(tmp
.getConst
< CVC4::String
>().getFirstChar() == c
.getFirstChar()) {
249 retNode
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
250 tmp
.getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkConst( tmp
.getConst
< CVC4::String
>().substr(1) ) );
258 if(tmp
.getKind() == kind::STRING_CONCAT
) {
261 if(t2
.getConst
< CVC4::String
>().getFirstChar() == c
.getFirstChar()) {
262 Node n
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
263 tmp
.getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkConst( tmp
.getConst
< CVC4::String
>().substr(1) ) );
264 std::vector
< Node
> vec_nodes
;
265 vec_nodes
.push_back(n
);
266 for(unsigned i
=1; i
<tmp
.getNumChildren(); i
++) {
267 vec_nodes
.push_back(tmp
[i
]);
269 retNode
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vec_nodes
);
276 std::vector
< Node
> vec_nodes
;
277 for(unsigned i
=1; i
<tmp
.getNumChildren(); i
++) {
278 vec_nodes
.push_back(tmp
[i
]);
280 rest
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vec_nodes
);
284 Node sk
= NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
285 retNode
= NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, sk
);
287 retNode
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, retNode
, rest
));
289 Node exp
= tmp
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
,
290 NodeManager::currentNM()->mkConst(c
), sk
));
291 retNode
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE
, exp
, retNode
, d_emptyRegexp
));
296 case kind::REGEXP_CONCAT
: {
297 std::vector
< Node
> vec_nodes
;
298 std::vector
< Node
> delta_nodes
;
300 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
303 int rt
= derivativeS(r
[i
], c
, dc
);
308 std::vector
< Node
> vec_nodes2
;
309 if(dc
!= d_emptySingleton
) {
310 vec_nodes2
.push_back( dc
);
312 for(unsigned j
=i
+1; j
<r
.getNumChildren(); ++j
) {
313 if(r
[j
] != d_emptySingleton
) {
314 vec_nodes2
.push_back( r
[j
] );
317 Node tmp
= vec_nodes2
.size()==0 ? d_emptySingleton
:
318 vec_nodes2
.size()==1 ? vec_nodes2
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, vec_nodes2
);
319 if(dnode
!= d_true
) {
320 tmp
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE
, dnode
, tmp
, d_emptyRegexp
));
323 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), tmp
) == vec_nodes
.end()) {
324 vec_nodes
.push_back( tmp
);
328 int rt2
= delta( r
[i
], exp3
);
330 dnode
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND
, dnode
, exp3
));
331 } else if( rt2
== 2 ) {
335 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
336 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
337 if(retNode
== d_emptyRegexp
) {
342 case kind::REGEXP_UNION
: {
343 std::vector
< Node
> vec_nodes
;
344 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
346 int rt
= derivativeS(r
[i
], c
, dc
);
351 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
352 vec_nodes
.push_back( dc
);
355 Trace("regexp-derive") << "RegExp-derive OR R[" << i
<< "]{ " << mkString(r
[i
]) << " returns " << mkString(dc
) << std::endl
;
357 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
358 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
359 if(retNode
== d_emptyRegexp
) {
364 case kind::REGEXP_INTER
: {
366 bool flag_sg
= false;
367 std::vector
< Node
> vec_nodes
;
368 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
370 int rt
= derivativeS(r
[i
], c
, dc
);
377 if(dc
== d_sigma_star
) {
380 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
381 vec_nodes
.push_back( dc
);
386 if(vec_nodes
.size() == 0 && flag_sg
) {
387 retNode
= d_sigma_star
;
389 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
390 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER
, vec_nodes
) );
391 if(retNode
== d_emptyRegexp
) {
396 retNode
= d_emptyRegexp
;
401 case kind::REGEXP_STAR
: {
403 ret
= derivativeS(r
[0], c
, dc
);
404 retNode
= dc
==d_emptyRegexp
? dc
: (dc
==d_emptySingleton
? r
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r
));
408 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in derivative of RegExp." << std::endl
;
409 Assert( false, "Unsupported Term" );
412 if(retNode
!= d_emptyRegexp
) {
413 retNode
= Rewriter::rewrite( retNode
);
415 std::pair
< Node
, int > p(retNode
, ret
);
416 d_deriv_cache
[dv
] = p
;
419 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode
) << std::endl
;
423 Node
RegExpOpr::derivativeSingle( Node r
, CVC4::String c
) {
424 Assert( c
.size() < 2 );
425 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r
) << " }, c=" << c
<< std::endl
;
426 Node retNode
= d_emptyRegexp
;
427 PairNodeStr dv
= std::make_pair( r
, c
);
428 if( d_dv_cache
.find( dv
) != d_dv_cache
.end() ) {
429 retNode
= d_dv_cache
[dv
];
430 } else if( c
.isEmptyString() ){
432 int tmp
= delta( r
, exp
);
435 retNode
= d_emptyRegexp
;
436 } else if(tmp
== 1) {
439 retNode
= d_emptyRegexp
;
444 case kind::REGEXP_EMPTY
: {
445 retNode
= d_emptyRegexp
;
448 case kind::REGEXP_SIGMA
: {
449 retNode
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
);
452 case kind::STRING_TO_REGEXP
: {
454 if(r
[0] == d_emptyString
) {
455 retNode
= d_emptyRegexp
;
457 if(r
[0].getConst
< CVC4::String
>().getFirstChar() == c
.getFirstChar()) {
458 retNode
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
459 r
[0].getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkConst( r
[0].getConst
< CVC4::String
>().substr(1) ) );
461 retNode
= d_emptyRegexp
;
466 retNode
= d_emptyRegexp
;
470 case kind::REGEXP_CONCAT
: {
471 Node rees
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
);
472 std::vector
< Node
> vec_nodes
;
473 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
474 Node dc
= derivativeSingle(r
[i
], c
);
475 if(dc
!= d_emptyRegexp
) {
476 std::vector
< Node
> vec_nodes2
;
478 vec_nodes2
.push_back( dc
);
480 for(unsigned j
=i
+1; j
<r
.getNumChildren(); ++j
) {
482 vec_nodes2
.push_back( r
[j
] );
485 Node tmp
= vec_nodes2
.size()==0 ? rees
:
486 vec_nodes2
.size()==1 ? vec_nodes2
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, vec_nodes2
);
487 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), tmp
) == vec_nodes
.end()) {
488 vec_nodes
.push_back( tmp
);
492 if( delta( r
[i
], exp
) != 1 ) {
496 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
497 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
500 case kind::REGEXP_UNION
: {
501 std::vector
< Node
> vec_nodes
;
502 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
503 Node dc
= derivativeSingle(r
[i
], c
);
504 if(dc
!= d_emptyRegexp
) {
505 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
506 vec_nodes
.push_back( dc
);
509 Trace("regexp-derive") << "RegExp-derive OR R[" << i
<< "]{ " << mkString(r
[i
]) << " returns " << mkString(dc
) << std::endl
;
511 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
512 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
515 case kind::REGEXP_INTER
: {
517 bool flag_sg
= false;
518 std::vector
< Node
> vec_nodes
;
519 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
520 Node dc
= derivativeSingle(r
[i
], c
);
521 if(dc
!= d_emptyRegexp
) {
522 if(dc
== d_sigma_star
) {
525 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
526 vec_nodes
.push_back( dc
);
535 if(vec_nodes
.size() == 0 && flag_sg
) {
536 retNode
= d_sigma_star
;
538 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
539 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER
, vec_nodes
) );
542 retNode
= d_emptyRegexp
;
546 case kind::REGEXP_STAR
: {
547 Node dc
= derivativeSingle(r
[0], c
);
548 if(dc
!= d_emptyRegexp
) {
549 retNode
= dc
==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
) ? r
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r
);
551 retNode
= d_emptyRegexp
;
556 //TODO: special sym: sigma, none, all
557 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in derivative of RegExp." << std::endl
;
558 Assert( false, "Unsupported Term" );
559 //return Node::null();
562 if(retNode
!= d_emptyRegexp
) {
563 retNode
= Rewriter::rewrite( retNode
);
565 d_dv_cache
[dv
] = retNode
;
567 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode
) << std::endl
;
572 bool RegExpOpr::guessLength( Node r
, int &co
) {
575 case kind::STRING_TO_REGEXP
:
578 co
+= r
[0].getConst
< CVC4::String
>().size();
585 case kind::REGEXP_CONCAT
:
587 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
588 if(!guessLength( r
[i
], co
)) {
595 case kind::REGEXP_UNION
:
598 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
600 if(!guessLength( r
[i
], cop
)) {
606 g_co
= gcd(g_co
, cop
);
612 case kind::REGEXP_INTER
:
615 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
617 if(!guessLength( r
[i
], cop
)) {
623 g_co
= gcd(g_co
, cop
);
629 case kind::REGEXP_STAR
:
636 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in membership of RegExp." << std::endl
;
641 void RegExpOpr::firstChars( Node r
, std::set
<unsigned> &pcset
, SetNodes
&pvset
) {
642 std::map
< Node
, std::pair
< std::set
<unsigned>, SetNodes
> >::const_iterator itr
= d_fset_cache
.find(r
);
643 if(itr
!= d_fset_cache
.end()) {
644 pcset
.insert((itr
->second
).first
.begin(), (itr
->second
).first
.end());
645 pvset
.insert((itr
->second
).second
.begin(), (itr
->second
).second
.end());
647 std::set
<unsigned> cset
;
651 case kind::REGEXP_EMPTY
: {
654 case kind::REGEXP_SIGMA
: {
655 for(unsigned i
=0; i
<d_card
; i
++) {
660 case kind::STRING_TO_REGEXP
: {
661 Node st
= Rewriter::rewrite(r
[0]);
663 CVC4::String s
= st
.getConst
< CVC4::String
>();
667 } else if(st
.getKind() == kind::VARIABLE
) {
670 if(st
[0].isConst()) {
671 CVC4::String s
= st
[0].getConst
< CVC4::String
>();
674 vset
.insert( st
[0] );
679 case kind::REGEXP_CONCAT
: {
680 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
681 firstChars(r
[i
], cset
, vset
);
684 int r
= delta( n
, exp
);
691 case kind::REGEXP_UNION
: {
692 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
693 firstChars(r
[i
], cset
, vset
);
697 case kind::REGEXP_INTER
: {
698 //TODO: Overapproximation for now
699 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
700 firstChars(r
[i
], cset
, vset
);
704 case kind::REGEXP_STAR
: {
705 firstChars(r
[0], cset
, vset
);
709 Trace("strings-regexp") << "Unsupported term: " << r
<< " in getCharSet." << std::endl
;
710 Assert( false, "Unsupported Term" );
713 pcset
.insert(cset
.begin(), cset
.end());
714 pvset
.insert(vset
.begin(), vset
.end());
715 std::pair
< std::set
<unsigned>, SetNodes
> p(cset
, vset
);
718 Trace("regexp-fset") << "FSET( " << mkString(r
) << " ) = { ";
719 for(std::set
<unsigned>::const_iterator itr
= cset
.begin();
720 itr
!= cset
.end(); itr
++) {
721 Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr
) << ",";
723 Trace("regexp-fset") << " }" << std::endl
;
727 bool RegExpOpr::follow( Node r
, CVC4::String c
, std::vector
< char > &vec_chars
) {
730 case kind::STRING_TO_REGEXP
:
733 if(r
[0] != d_emptyString
) {
734 char t1
= r
[0].getConst
< CVC4::String
>().getFirstChar();
735 if(c
.isEmptyString()) {
736 vec_chars
.push_back( t1
);
739 char t2
= c
.getFirstChar();
744 vec_chars
.push_back( c
.substr(1,1).getFirstChar() );
746 vec_chars
.push_back( '\0' );
759 case kind::REGEXP_CONCAT
:
761 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
762 if( follow(r
[i
], c
, vec_chars
) ) {
763 if(vec_chars
[vec_chars
.size() - 1] == '\0') {
764 vec_chars
.pop_back();
765 c
= d_emptyString
.getConst
< CVC4::String
>();
771 vec_chars
.push_back( '\0' );
775 case kind::REGEXP_UNION
:
778 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
779 if( follow(r
[i
], c
, vec_chars
) ) {
786 case kind::REGEXP_INTER
:
788 std::vector
< char > vt2
;
789 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
790 std::vector
< char > v_tmp
;
791 if( !follow(r
[i
], c
, v_tmp
) ) {
794 std::vector
< char > vt3(vt2
);
796 std::set_intersection( vt3
.begin(), vt3
.end(), v_tmp
.begin(), v_tmp
.end(), vt2
.begin() );
797 if(vt2
.size() == 0) {
801 vec_chars
.insert( vec_chars
.end(), vt2
.begin(), vt2
.end() );
805 case kind::REGEXP_STAR
:
807 if(follow(r
[0], c
, vec_chars
)) {
808 if(vec_chars
[vec_chars
.size() - 1] == '\0') {
809 if(c
.isEmptyString()) {
812 vec_chars
.pop_back();
813 c
= d_emptyString
.getConst
< CVC4::String
>();
814 return follow(r
[0], c
, vec_chars
);
820 vec_chars
.push_back( '\0' );
826 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in delta of RegExp." << std::endl
;
827 //AlwaysAssert( false );
828 //return Node::null();
834 Node
RegExpOpr::mkAllExceptOne( char exp_c
) {
835 std::vector
< Node
> vec_nodes
;
836 for(char c
=d_char_start
; c
<=d_char_end
; ++c
) {
838 Node n
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst( ::CVC4::String( c
) ) );
839 vec_nodes
.push_back( n
);
842 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
);
846 void RegExpOpr::simplify(Node t
, std::vector
< Node
> &new_nodes
, bool polarity
) {
847 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t
<< ", polarity=" << polarity
<< std::endl
;
848 Assert(t
.getKind() == kind::STRING_IN_REGEXP
);
849 Node str
= Rewriter::rewrite(t
[0]);
850 Node re
= Rewriter::rewrite(t
[1]);
852 simplifyPRegExp( str
, re
, new_nodes
);
854 simplifyNRegExp( str
, re
, new_nodes
);
856 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes
.size() << "):\n";
857 for(unsigned i
=0; i
<new_nodes
.size(); i
++) {
858 Trace("strings-regexp-simpl") << "\t" << new_nodes
[i
] << std::endl
;
861 void RegExpOpr::simplifyNRegExp( Node s
, Node r
, std::vector
< Node
> &new_nodes
) {
862 std::pair
< Node
, Node
> p(s
, r
);
863 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_simpl_neg_cache
.find(p
);
864 if(itr
!= d_simpl_neg_cache
.end()) {
865 new_nodes
.push_back( itr
->second
);
870 case kind::REGEXP_EMPTY
: {
874 case kind::REGEXP_SIGMA
: {
875 conc
= d_one
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
)).negate();
878 case kind::STRING_TO_REGEXP
: {
879 conc
= s
.eqNode(r
[0]).negate();
882 case kind::REGEXP_CONCAT
: {
883 //TODO: rewrite empty
884 Node lens
= NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
);
885 Node b1
= NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
886 Node b1v
= NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST
, b1
);
887 Node g1
= NodeManager::currentNM()->mkNode( kind::AND
, NodeManager::currentNM()->mkNode(kind::GEQ
, b1
, d_zero
),
888 NodeManager::currentNM()->mkNode( kind::GEQ
, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
), b1
) );
889 Node s1
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL
, s
, d_zero
, b1
));
890 Node s2
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL
, s
, b1
, NodeManager::currentNM()->mkNode(kind::MINUS
, lens
, b1
)));
891 Node s1r1
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s1
, r
[0]).negate();
892 if(r
[0].getKind() == kind::STRING_TO_REGEXP
) {
893 s1r1
= s1
.eqNode(r
[0][0]).negate();
894 } else if(r
[0].getKind() == kind::REGEXP_EMPTY
) {
898 if(r
.getNumChildren() > 2) {
899 std::vector
< Node
> nvec
;
900 for(unsigned i
=1; i
<r
.getNumChildren(); i
++) {
901 nvec
.push_back( r
[i
] );
903 r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, nvec
);
905 r2
= Rewriter::rewrite(r2
);
906 Node s2r2
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s2
, r2
).negate();
907 if(r2
.getKind() == kind::STRING_TO_REGEXP
) {
908 s2r2
= s2
.eqNode(r2
[0]).negate();
909 } else if(r2
.getKind() == kind::REGEXP_EMPTY
) {
913 conc
= NodeManager::currentNM()->mkNode(kind::OR
, s1r1
, s2r2
);
914 conc
= NodeManager::currentNM()->mkNode(kind::IMPLIES
, g1
, conc
);
915 conc
= NodeManager::currentNM()->mkNode(kind::FORALL
, b1v
, conc
);
918 case kind::REGEXP_UNION
: {
919 std::vector
< Node
> c_and
;
920 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
921 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
922 c_and
.push_back( r
[i
][0].eqNode(s
).negate() );
923 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
926 c_and
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]).negate());
929 conc
= c_and
.size() == 0 ? d_true
:
930 c_and
.size() == 1 ? c_and
[0] : NodeManager::currentNM()->mkNode(kind::AND
, c_and
);
933 case kind::REGEXP_INTER
: {
934 bool emptyflag
= false;
935 std::vector
< Node
> c_or
;
936 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
937 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
938 c_or
.push_back( r
[i
][0].eqNode(s
).negate() );
939 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
943 c_or
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]).negate());
949 conc
= c_or
.size() == 1 ? c_or
[0] : NodeManager::currentNM()->mkNode(kind::OR
, c_or
);
953 case kind::REGEXP_STAR
: {
954 if(s
== d_emptyString
) {
956 } else if(r
[0].getKind() == kind::REGEXP_EMPTY
) {
957 conc
= s
.eqNode(d_emptyString
).negate();
958 } else if(r
[0].getKind() == kind::REGEXP_SIGMA
) {
961 Node lens
= NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
);
962 Node sne
= s
.eqNode(d_emptyString
).negate();
963 Node b1
= NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
964 Node b1v
= NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST
, b1
);
965 Node g1
= NodeManager::currentNM()->mkNode( kind::AND
, NodeManager::currentNM()->mkNode(kind::GEQ
, b1
, d_one
),
966 NodeManager::currentNM()->mkNode( kind::GEQ
, lens
, b1
) );
968 Node s1
= NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL
, s
, d_zero
, b1
);
969 Node s2
= NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL
, s
, b1
, NodeManager::currentNM()->mkNode(kind::MINUS
, lens
, b1
));
970 Node s1r1
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s1
, r
[0]).negate();
971 Node s2r2
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s2
, r
).negate();
973 conc
= NodeManager::currentNM()->mkNode(kind::OR
, s1r1
, s2r2
);
974 conc
= NodeManager::currentNM()->mkNode(kind::IMPLIES
, g1
, conc
);
975 conc
= NodeManager::currentNM()->mkNode(kind::FORALL
, b1v
, conc
);
976 conc
= NodeManager::currentNM()->mkNode(kind::AND
, sne
, conc
);
981 Trace("strings-regexp") << "Unsupported term: " << r
<< " in simplifyNRegExp." << std::endl
;
982 Assert( false, "Unsupported Term" );
985 conc
= Rewriter::rewrite( conc
);
986 new_nodes
.push_back( conc
);
987 d_simpl_neg_cache
[p
] = conc
;
990 void RegExpOpr::simplifyPRegExp( Node s
, Node r
, std::vector
< Node
> &new_nodes
) {
991 std::pair
< Node
, Node
> p(s
, r
);
992 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_simpl_cache
.find(p
);
993 if(itr
!= d_simpl_cache
.end()) {
994 new_nodes
.push_back( itr
->second
);
999 case kind::REGEXP_EMPTY
: {
1003 case kind::REGEXP_SIGMA
: {
1004 conc
= d_one
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
));
1007 case kind::STRING_TO_REGEXP
: {
1008 conc
= s
.eqNode(r
[0]);
1011 case kind::REGEXP_CONCAT
: {
1012 std::vector
< Node
> nvec
;
1013 std::vector
< Node
> cc
;
1014 bool emptyflag
= false;
1015 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1016 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
1017 cc
.push_back( r
[i
][0] );
1018 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
1022 Node sk
= NodeManager::currentNM()->mkSkolem( "rc", s
.getType(), "created for regular expression concat" );
1023 Node lem
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk
, r
[i
]);
1024 nvec
.push_back(lem
);
1031 Node lem
= s
.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
, cc
) );
1032 nvec
.push_back(lem
);
1033 conc
= nvec
.size() == 1 ? nvec
[0] : NodeManager::currentNM()->mkNode(kind::AND
, nvec
);
1037 case kind::REGEXP_UNION
: {
1038 std::vector
< Node
> c_or
;
1039 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1040 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
1041 c_or
.push_back( r
[i
][0].eqNode(s
) );
1042 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
1045 c_or
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]));
1048 conc
= c_or
.size() == 0 ? d_false
:
1049 c_or
.size() == 1 ? c_or
[0] : NodeManager::currentNM()->mkNode(kind::OR
, c_or
);
1052 case kind::REGEXP_INTER
: {
1053 std::vector
< Node
> c_and
;
1054 bool emptyflag
= false;
1055 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1056 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
1057 c_and
.push_back( r
[i
][0].eqNode(s
) );
1058 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
1062 c_and
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]));
1068 conc
= c_and
.size() == 1 ? c_and
[0] : NodeManager::currentNM()->mkNode(kind::AND
, c_and
);
1072 case kind::REGEXP_STAR
: {
1073 if(s
== d_emptyString
) {
1075 } else if(r
[0].getKind() == kind::REGEXP_EMPTY
) {
1076 conc
= s
.eqNode(d_emptyString
);
1077 } else if(r
[0].getKind() == kind::REGEXP_SIGMA
) {
1080 Node se
= s
.eqNode(d_emptyString
);
1081 Node sinr
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[0]);
1082 Node sk1
= NodeManager::currentNM()->mkSkolem( "rs", s
.getType(), "created for regular expression star" );
1083 Node sk2
= NodeManager::currentNM()->mkSkolem( "rs", s
.getType(), "created for regular expression star" );
1084 Node s1nz
= sk1
.eqNode(d_emptyString
).negate();
1085 Node s2nz
= sk2
.eqNode(d_emptyString
).negate();
1086 Node s1inr
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk1
, r
[0]);
1087 Node s2inrs
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk2
, r
);
1088 Node s12
= s
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
, sk1
, sk2
));
1090 conc
= NodeManager::currentNM()->mkNode(kind::AND
, s12
, s1nz
, s2nz
, s1inr
, s2inrs
);
1091 conc
= NodeManager::currentNM()->mkNode(kind::OR
, se
, sinr
, conc
);
1096 Trace("strings-regexp") << "Unsupported term: " << r
<< " in simplifyPRegExp." << std::endl
;
1097 Assert( false, "Unsupported Term" );
1100 conc
= Rewriter::rewrite( conc
);
1101 new_nodes
.push_back( conc
);
1102 d_simpl_cache
[p
] = conc
;
1106 void RegExpOpr::getCharSet( Node r
, std::set
<unsigned> &pcset
, SetNodes
&pvset
) {
1107 std::map
< Node
, std::pair
< std::set
<unsigned>, SetNodes
> >::const_iterator itr
= d_cset_cache
.find(r
);
1108 if(itr
!= d_cset_cache
.end()) {
1109 pcset
.insert((itr
->second
).first
.begin(), (itr
->second
).first
.end());
1110 pvset
.insert((itr
->second
).second
.begin(), (itr
->second
).second
.end());
1112 std::set
<unsigned> cset
;
1114 int k
= r
.getKind();
1116 case kind::REGEXP_EMPTY
: {
1119 case kind::REGEXP_SIGMA
: {
1120 for(unsigned i
=0; i
<d_card
; i
++) {
1125 case kind::STRING_TO_REGEXP
: {
1126 Node st
= Rewriter::rewrite(r
[0]);
1128 CVC4::String s
= st
.getConst
< CVC4::String
>();
1129 s
.getCharSet( cset
);
1130 } else if(st
.getKind() == kind::VARIABLE
) {
1133 for(unsigned i
=0; i
<st
.getNumChildren(); i
++) {
1134 if(st
[i
].isConst()) {
1135 CVC4::String s
= st
[i
].getConst
< CVC4::String
>();
1136 s
.getCharSet( cset
);
1138 vset
.insert( st
[i
] );
1144 case kind::REGEXP_CONCAT
: {
1145 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1146 getCharSet(r
[i
], cset
, vset
);
1150 case kind::REGEXP_UNION
: {
1151 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1152 getCharSet(r
[i
], cset
, vset
);
1156 case kind::REGEXP_INTER
: {
1157 //TODO: Overapproximation for now
1158 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1159 getCharSet(r
[i
], cset
, vset
);
1163 case kind::REGEXP_STAR
: {
1164 getCharSet(r
[0], cset
, vset
);
1168 Trace("strings-regexp") << "Unsupported term: " << r
<< " in getCharSet." << std::endl
;
1169 Assert( false, "Unsupported Term" );
1172 pcset
.insert(cset
.begin(), cset
.end());
1173 pvset
.insert(vset
.begin(), vset
.end());
1174 std::pair
< std::set
<unsigned>, SetNodes
> p(cset
, vset
);
1175 d_cset_cache
[r
] = p
;
1177 Trace("regexp-cset") << "CSET( " << mkString(r
) << " ) = { ";
1178 for(std::set
<unsigned>::const_iterator itr
= cset
.begin();
1179 itr
!= cset
.end(); itr
++) {
1180 Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr
) << ",";
1182 Trace("regexp-cset") << " }" << std::endl
;
1186 bool RegExpOpr::isPairNodesInSet(std::set
< PairNodes
> &s
, Node n1
, Node n2
) {
1187 for(std::set
< PairNodes
>::const_iterator itr
= s
.begin();
1188 itr
!= s
.end(); ++itr
) {
1189 if(itr
->first
== n1
&& itr
->second
== n2
||
1190 itr
->first
== n2
&& itr
->second
== n1
) {
1197 Node
RegExpOpr::intersectInternal( Node r1
, Node r2
, std::map
< unsigned, std::set
< PairNodes
> > cache
, bool &spflag
) {
1198 Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1
) << ",\n " << mkString(r2
) << std::endl
;
1201 return Node::null();
1203 std::pair
< Node
, Node
> p(r1
, r2
);
1204 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_inter_cache
.find(p
);
1206 if(itr
!= d_inter_cache
.end()) {
1207 rNode
= itr
->second
;
1211 } else if(r1
== d_emptyRegexp
|| r2
== d_emptyRegexp
) {
1212 rNode
= d_emptyRegexp
;
1213 } else if(r1
== d_emptySingleton
|| r2
== d_emptySingleton
) {
1215 int r
= delta((r1
== d_emptySingleton
? r2
: r1
), exp
);
1220 rNode
= d_emptySingleton
;
1222 rNode
= d_emptyRegexp
;
1225 std::set
< unsigned > cset
, cset2
;
1226 std::set
< Node
> vset
, vset2
;
1227 getCharSet(r1
, cset
, vset
);
1228 getCharSet(r2
, cset2
, vset2
);
1229 if(vset
.empty() && vset2
.empty()) {
1231 firstChars(r1
, cset
, vset
);
1232 std::vector
< Node
> vec_nodes
;
1234 int flag
= delta(r1
, delta_exp
);
1235 int flag2
= delta(r2
, delta_exp
);
1236 if(flag
!= 2 && flag2
!= 2) {
1237 if(flag
== 1 && flag2
== 1) {
1238 vec_nodes
.push_back(d_emptySingleton
);
1244 if(Trace
.isOn("regexp-debug")) {
1245 Trace("regexp-debug") << "Try CSET( " << cset
.size() << " ) = ";
1246 for(std::set
<unsigned>::const_iterator itr
= cset
.begin();
1247 itr
!= cset
.end(); itr
++) {
1248 Trace("regexp-debug") << *itr
<< ", ";
1250 Trace("regexp-debug") << std::endl
;
1252 for(std::set
<unsigned>::const_iterator itr
= cset
.begin();
1253 itr
!= cset
.end(); itr
++) {
1254 CVC4::String
c( CVC4::String::convertUnsignedIntToChar(*itr
) );
1255 if(!isPairNodesInSet(cache
[ *itr
], r1
, r2
)) {
1256 Node r1l
= derivativeSingle(r1
, c
);
1257 Node r2l
= derivativeSingle(r2
, c
);
1258 std::map
< unsigned, std::set
< PairNodes
> > cache2(cache
);
1259 PairNodes
p(r1l
, r2l
);
1260 cache2
[ *itr
].insert( p
);
1261 Node rt
= intersectInternal(r1l
, r2l
, cache2
, spflag
);
1264 return Node::null();
1266 rt
= Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
,
1267 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(c
)), rt
) );
1268 vec_nodes
.push_back(rt
);
1271 rNode
= vec_nodes
.size()==0 ? d_emptyRegexp
: vec_nodes
.size()==1 ? vec_nodes
[0] :
1272 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vec_nodes
);
1273 rNode
= Rewriter::rewrite( rNode
);
1275 //TODO: non-empty var set
1279 d_inter_cache
[p
] = rNode
;
1281 Trace("regexp-intersect") << "INTERSECT( " << mkString(r1
) << ", " << mkString(r2
) << " ) = " << mkString(rNode
) << std::endl
;
1285 bool RegExpOpr::containC2(unsigned cnt
, Node n
) {
1286 if(n
.getKind() == kind::REGEXP_RV
) {
1287 unsigned y
= n
[0].getConst
<Rational
>().getNumerator().toUnsignedInt();
1289 } else if(n
.getKind() == kind::REGEXP_CONCAT
) {
1290 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1291 if(containC2(cnt
, n
[i
])) {
1295 } else if(n
.getKind() == kind::REGEXP_STAR
) {
1296 return containC2(cnt
, n
[0]);
1297 } else if(n
.getKind() == kind::REGEXP_UNION
) {
1298 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1299 if(containC2(cnt
, n
[i
])) {
1306 Node
RegExpOpr::convert1(unsigned cnt
, Node n
) {
1307 Trace("regexp-debug") << "Converting " << n
<< " at " << cnt
<< "... " << std::endl
;
1309 convert2(cnt
, n
, r1
, r2
);
1310 Trace("regexp-debug") << "... getting r1=" << r1
<< ", and r2=" << r2
<< std::endl
;
1311 Node ret
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
,
1312 NodeManager::currentNM()->mkNode(kind::REGEXP_STAR
, r1
), r2
);
1313 ret
= Rewriter::rewrite( ret
);
1314 Trace("regexp-debug") << "... done convert at " << cnt
<< ", with return " << ret
<< std::endl
;
1317 void RegExpOpr::convert2(unsigned cnt
, Node n
, Node
&r1
, Node
&r2
) {
1318 if(n
== d_emptyRegexp
) {
1321 } else if(n
== d_emptySingleton
) {
1322 r1
= d_emptySingleton
;
1323 r2
= d_emptySingleton
;
1324 } else if(n
.getKind() == kind::REGEXP_RV
) {
1325 unsigned y
= n
[0].getConst
<Rational
>().getNumerator().toUnsignedInt();
1326 r1
= d_emptySingleton
;
1332 } else if(n
.getKind() == kind::REGEXP_CONCAT
) {
1334 //convert2 x (r@(Seq l r1))
1335 // | contains x r1 = let (r2,r3) = convert2 x r1
1336 // in (Seq l r2, r3)
1337 // | otherwise = (Empty, r)
1339 std::vector
<Node
> vr1
, vr2
;
1340 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1341 if(containC2(cnt
, n
[i
])) {
1343 convert2(cnt
, n
[i
], t1
, t2
);
1345 r1
= vr1
.size()==0 ? d_emptyRegexp
: vr1
.size()==1 ? vr1
[0] :
1346 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vr1
);
1348 for( unsigned j
=i
+1; j
<n
.getNumChildren(); j
++ ) {
1349 vr2
.push_back(n
[j
]);
1351 r2
= vr2
.size()==0 ? d_emptyRegexp
: vr2
.size()==1 ? vr2
[0] :
1352 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vr2
);
1356 vr1
.push_back(n
[i
]);
1360 r1
= d_emptySingleton
;
1363 } else if(n
.getKind() == kind::REGEXP_UNION
) {
1364 std::vector
<Node
> vr1
, vr2
;
1365 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1367 convert2(cnt
, n
[i
], t1
, t2
);
1371 r1
= NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vr1
);
1372 r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vr2
);
1373 } else if(n
.getKind() == kind::STRING_TO_REGEXP
) {
1374 r1
= d_emptySingleton
;
1380 Node
RegExpOpr::intersectInternal2( Node r1
, Node r2
, std::map
< PairNodes
, Node
> cache
, bool &spflag
, unsigned cnt
) {
1381 Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1
) << ",\n " << mkString(r2
) << std::endl
;
1382 //if(Trace.isOn("regexp-debug")) {
1383 // Trace("regexp-debug") << "... with cache:\n";
1384 // for(std::map< PairNodes, Node >::const_iterator itr=cache.begin();
1385 // itr!=cache.end();itr++) {
1386 // Trace("regexp-debug") << "(" << itr->first.first << "," << itr->first.second << ")->" << itr->second << std::endl;
1391 return Node::null();
1393 std::pair
< Node
, Node
> p(r1
, r2
);
1394 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_inter_cache
.find(p
);
1396 if(itr
!= d_inter_cache
.end()) {
1397 rNode
= itr
->second
;
1399 if(r1
== d_emptyRegexp
|| r2
== d_emptyRegexp
) {
1400 rNode
= d_emptyRegexp
;
1401 } else if(r1
== d_emptySingleton
|| r2
== d_emptySingleton
) {
1403 int r
= delta((r1
== d_emptySingleton
? r2
: r1
), exp
);
1408 rNode
= d_emptySingleton
;
1410 rNode
= d_emptyRegexp
;
1412 } else if(r1
== r2
) {
1413 rNode
= convert1(cnt
, r1
);
1415 PairNodes
p(r1
, r2
);
1416 std::map
< PairNodes
, Node
>::const_iterator itrcache
= cache
.find(p
);
1417 if(itrcache
!= cache
.end()) {
1418 rNode
= itrcache
->second
;
1420 if(checkConstRegExp(r1
) && checkConstRegExp(r2
)) {
1421 std::vector
< unsigned > cset
;
1422 std::set
< unsigned > cset1
, cset2
;
1423 std::set
< Node
> vset1
, vset2
;
1424 firstChars(r1
, cset1
, vset1
);
1425 firstChars(r2
, cset2
, vset2
);
1426 std::set_intersection(cset1
.begin(), cset1
.end(), cset2
.begin(), cset1
.end(),
1427 std::inserter(cset
, cset
.begin()));
1428 std::vector
< Node
> vec_nodes
;
1430 int flag
= delta(r1
, delta_exp
);
1431 int flag2
= delta(r2
, delta_exp
);
1432 if(flag
!= 2 && flag2
!= 2) {
1433 if(flag
== 1 && flag2
== 1) {
1434 vec_nodes
.push_back(d_emptySingleton
);
1440 if(Trace
.isOn("regexp-debug")) {
1441 Trace("regexp-debug") << "Try CSET( " << cset
.size() << " ) = ";
1442 for(std::vector
<unsigned>::const_iterator itr
= cset
.begin();
1443 itr
!= cset
.end(); itr
++) {
1444 CVC4::String
c( CVC4::String::convertUnsignedIntToChar(*itr
) );
1445 Trace("regexp-debug") << c
<< ", ";
1447 Trace("regexp-debug") << std::endl
;
1449 for(std::vector
<unsigned>::const_iterator itr
= cset
.begin();
1450 itr
!= cset
.end(); itr
++) {
1451 CVC4::String
c( CVC4::String::convertUnsignedIntToChar(*itr
) );
1452 Node r1l
= derivativeSingle(r1
, c
);
1453 Node r2l
= derivativeSingle(r2
, c
);
1454 std::map
< PairNodes
, Node
> cache2(cache
);
1455 PairNodes
p(r1
, r2
);
1456 cache2
[ p
] = NodeManager::currentNM()->mkNode(kind::REGEXP_RV
, NodeManager::currentNM()->mkConst(CVC4::Rational(cnt
)));
1457 Node rt
= intersectInternal2(r1l
, r2l
, cache2
, spflag
, cnt
+1);
1458 rt
= convert1(cnt
, rt
);
1461 return Node::null();
1463 rt
= Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
,
1464 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(c
)), rt
) );
1465 vec_nodes
.push_back(rt
);
1467 rNode
= vec_nodes
.size()==0 ? d_emptyRegexp
: vec_nodes
.size()==1 ? vec_nodes
[0] :
1468 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vec_nodes
);
1469 rNode
= Rewriter::rewrite( rNode
);
1471 //TODO: non-empty var set
1476 d_inter_cache
[p
] = rNode
;
1478 Trace("regexp-intersect") << "End of INTERSECT( " << mkString(r1
) << ", " << mkString(r2
) << " ) = " << mkString(rNode
) << std::endl
;
1481 Node
RegExpOpr::intersect(Node r1
, Node r2
, bool &spflag
) {
1482 //std::map< unsigned, std::set< PairNodes > > cache;
1483 std::map
< PairNodes
, Node
> cache
;
1484 if(checkConstRegExp(r1
) && checkConstRegExp(r2
)) {
1485 return intersectInternal2(r1
, r2
, cache
, spflag
, 1);
1488 return Node::null();
1492 Node
RegExpOpr::complement(Node r
, int &ret
) {
1495 if(d_compl_cache
.find(r
) != d_compl_cache
.end()) {
1496 rNode
= d_compl_cache
[r
].first
;
1497 ret
= d_compl_cache
[r
].second
;
1499 if(r
== d_emptyRegexp
) {
1500 rNode
= d_sigma_star
;
1501 } else if(r
== d_emptySingleton
) {
1502 rNode
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, d_sigma
, d_sigma_star
);
1503 } else if(!checkConstRegExp(r
)) {
1504 //TODO: var to be extended
1507 std::set
<unsigned> cset
;
1509 firstChars(r
, cset
, vset
);
1510 std::vector
< Node
> vec_nodes
;
1511 for(unsigned i
=0; i
<d_card
; i
++) {
1512 CVC4::String c
= CVC4::String::convertUnsignedIntToChar(i
);
1513 Node n
= NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(c
));
1515 if(cset
.find(i
) == cset
.end()) {
1519 derivativeS(r
, c
, r2
);
1523 r2
= complement(r2
, rt
);
1526 n
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, n
, r2
));
1527 vec_nodes
.push_back(n
);
1529 rNode
= vec_nodes
.size()==0? d_emptyRegexp
: vec_nodes
.size()==1? vec_nodes
[0] :
1530 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vec_nodes
);
1532 rNode
= Rewriter::rewrite(rNode
);
1533 std::pair
< Node
, int > p(rNode
, ret
);
1534 d_compl_cache
[r
] = p
;
1536 Trace("regexp-compl") << "COMPL( " << mkString(r
) << " ) = " << mkString(rNode
) << ", ret=" << ret
<< std::endl
;
1540 void RegExpOpr::splitRegExp(Node r
, std::vector
< PairNodes
> &pset
) {
1541 Assert(checkConstRegExp(r
));
1542 if(d_split_cache
.find(r
) != d_split_cache
.end()) {
1543 pset
= d_split_cache
[r
];
1545 switch( r
.getKind() ) {
1546 case kind::REGEXP_EMPTY
: {
1549 case kind::REGEXP_OPT
: {
1550 PairNodes
tmp(d_emptySingleton
, d_emptySingleton
);
1551 pset
.push_back(tmp
);
1553 case kind::REGEXP_RANGE
:
1554 case kind::REGEXP_SIGMA
: {
1555 PairNodes
tmp1(d_emptySingleton
, r
);
1556 PairNodes
tmp2(r
, d_emptySingleton
);
1557 pset
.push_back(tmp1
);
1558 pset
.push_back(tmp2
);
1561 case kind::STRING_TO_REGEXP
: {
1562 Assert(r
[0].isConst());
1563 CVC4::String s
= r
[0].getConst
< CVC4::String
>();
1564 PairNodes
tmp1(d_emptySingleton
, r
);
1565 pset
.push_back(tmp1
);
1566 for(unsigned i
=1; i
<s
.size(); i
++) {
1567 CVC4::String s1
= s
.substr(0, i
);
1568 CVC4::String s2
= s
.substr(i
);
1569 Node n1
= NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(s1
));
1570 Node n2
= NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(s2
));
1571 PairNodes
tmp3(n1
, n2
);
1572 pset
.push_back(tmp3
);
1574 PairNodes
tmp2(r
, d_emptySingleton
);
1575 pset
.push_back(tmp2
);
1578 case kind::REGEXP_CONCAT
: {
1579 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1580 std::vector
< PairNodes
> tset
;
1581 splitRegExp(r
[i
], tset
);
1582 std::vector
< Node
> hvec
;
1583 std::vector
< Node
> tvec
;
1584 for(unsigned j
=0; j
<=i
; j
++) {
1585 hvec
.push_back(r
[j
]);
1587 for(unsigned j
=i
; j
<r
.getNumChildren(); j
++) {
1588 tvec
.push_back(r
[j
]);
1590 for(unsigned j
=0; j
<tset
.size(); j
++) {
1591 hvec
[i
] = tset
[j
].first
;
1592 tvec
[0] = tset
[j
].second
;
1593 Node r1
= Rewriter::rewrite( hvec
.size()==1?hvec
[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, hvec
) );
1594 Node r2
= Rewriter::rewrite( tvec
.size()==1?tvec
[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, tvec
) );
1595 PairNodes
tmp2(r1
, r2
);
1596 pset
.push_back(tmp2
);
1601 case kind::REGEXP_UNION
: {
1602 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1603 std::vector
< PairNodes
> tset
;
1604 splitRegExp(r
[i
], tset
);
1605 pset
.insert(pset
.end(), tset
.begin(), tset
.end());
1609 case kind::REGEXP_INTER
: {
1610 bool spflag
= false;
1612 for(unsigned i
=1; i
<r
.getNumChildren(); i
++) {
1613 tmp
= intersect(tmp
, r
[i
], spflag
);
1615 splitRegExp(tmp
, pset
);
1618 case kind::REGEXP_STAR
: {
1619 std::vector
< PairNodes
> tset
;
1620 splitRegExp(r
[0], tset
);
1621 PairNodes
tmp1(d_emptySingleton
, d_emptySingleton
);
1622 pset
.push_back(tmp1
);
1623 for(unsigned i
=0; i
<tset
.size(); i
++) {
1624 Node r1
= tset
[i
].first
==d_emptySingleton
? r
: NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, r
, tset
[i
].first
);
1625 Node r2
= tset
[i
].second
==d_emptySingleton
? r
: NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, tset
[i
].second
, r
);
1626 PairNodes
tmp2(r1
, r2
);
1627 pset
.push_back(tmp2
);
1631 case kind::REGEXP_PLUS
: {
1632 std::vector
< PairNodes
> tset
;
1633 splitRegExp(r
[0], tset
);
1634 for(unsigned i
=0; i
<tset
.size(); i
++) {
1635 Node r1
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, r
, tset
[i
].first
);
1636 Node r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, tset
[i
].second
, r
);
1637 PairNodes
tmp2(r1
, r2
);
1638 pset
.push_back(tmp2
);
1643 Trace("strings-error") << "Unsupported term: " << r
<< " in splitRegExp." << std::endl
;
1645 //return Node::null();
1648 d_split_cache
[r
] = pset
;
1653 std::string
RegExpOpr::niceChar( Node r
) {
1655 std::string s
= r
.getConst
<CVC4::String
>().toString() ;
1656 return s
== "" ? "{E}" : ( s
== " " ? "{ }" : s
.size()>1? "("+s
+")" : s
);
1658 std::string ss
= "$" + r
.toString();
1662 std::string
RegExpOpr::mkString( Node r
) {
1667 int k
= r
.getKind();
1669 case kind::REGEXP_EMPTY
: {
1673 case kind::REGEXP_SIGMA
: {
1677 case kind::STRING_TO_REGEXP
: {
1678 retStr
+= niceChar( r
[0] );
1681 case kind::REGEXP_CONCAT
: {
1683 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1684 //if(i != 0) retStr += ".";
1685 retStr
+= mkString( r
[i
] );
1690 case kind::REGEXP_UNION
: {
1695 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1696 if(i
!= 0) retStr
+= "|";
1697 retStr
+= mkString( r
[i
] );
1703 case kind::REGEXP_INTER
: {
1705 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1706 if(i
!= 0) retStr
+= "&";
1707 retStr
+= mkString( r
[i
] );
1712 case kind::REGEXP_STAR
: {
1713 retStr
+= mkString( r
[0] );
1717 case kind::REGEXP_PLUS
: {
1718 retStr
+= mkString( r
[0] );
1722 case kind::REGEXP_OPT
: {
1723 retStr
+= mkString( r
[0] );
1727 case kind::REGEXP_RANGE
: {
1729 retStr
+= niceChar( r
[0] );
1731 retStr
+= niceChar( r
[1] );
1735 case kind::REGEXP_RV
: {
1737 retStr
+= r
[0].getConst
<Rational
>().getNumerator().toString();
1742 Trace("strings-error") << "Unsupported term: " << r
<< " in RegExp." << std::endl
;
1744 //return Node::null();
1751 }/* CVC4::theory::strings namespace */
1752 }/* CVC4::theory namespace */
1753 }/* CVC4 namespace */