0d422e823d5022e00fa7c2432edfdcea01118da8
1 /********************* */
2 /*! \file regexp_operation.cpp
4 ** Top contributors (to current version):
5 ** Tianyi Liang, Andrew Reynolds, Tim King
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
12 ** \brief Symbolic Regular Expresion Operations
14 ** Symbolic Regular Expresion Operations
17 #include "theory/strings/regexp_operation.h"
19 #include "expr/kind.h"
20 #include "options/strings_options.h"
21 #include "theory/strings/theory_strings_rewriter.h"
24 using namespace CVC4::kind
;
30 RegExpOpr::RegExpOpr()
31 : d_emptyString(NodeManager::currentNM()->mkConst(::CVC4::String(""))),
32 d_true(NodeManager::currentNM()->mkConst(true)),
33 d_false(NodeManager::currentNM()->mkConst(false)),
34 d_emptySingleton(NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
,
36 d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY
,
37 std::vector
<Node
>{})),
38 d_zero(NodeManager::currentNM()->mkConst(::CVC4::Rational(0))),
39 d_one(NodeManager::currentNM()->mkConst(::CVC4::Rational(1))),
40 d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA
,
41 std::vector
<Node
>{})),
42 d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR
, d_sigma
))
44 d_lastchar
= TheoryStringsRewriter::getAlphabetCardinality()-1;
47 RegExpOpr::~RegExpOpr() {}
49 bool RegExpOpr::checkConstRegExp( Node r
) {
50 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with /" << mkString( r
) << "/" << std::endl
;
52 if( d_cstre_cache
.find( r
) != d_cstre_cache
.end() ) {
53 ret
= d_cstre_cache
[r
];
55 if(r
.getKind() == kind::STRING_TO_REGEXP
) {
56 Node tmp
= Rewriter::rewrite( r
[0] );
59 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
60 if(!checkConstRegExp(r
[i
])) {
65 d_cstre_cache
[r
] = ret
;
70 // 0-unknown, 1-yes, 2-no
71 int RegExpOpr::delta( Node r
, Node
&exp
) {
72 Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r
) << "/" << std::endl
;
74 if( d_delta_cache
.find( r
) != d_delta_cache
.end() ) {
75 ret
= d_delta_cache
[r
].first
;
76 exp
= d_delta_cache
[r
].second
;
80 case kind::REGEXP_EMPTY
: {
84 case kind::REGEXP_SIGMA
: {
88 case kind::STRING_TO_REGEXP
: {
89 Node tmp
= Rewriter::rewrite(r
[0]);
91 if(tmp
== d_emptyString
) {
98 if(tmp
.getKind() == kind::STRING_CONCAT
) {
99 for(unsigned i
=0; i
<tmp
.getNumChildren(); i
++) {
100 if(tmp
[i
].isConst()) {
107 exp
= r
[0].eqNode(d_emptyString
);
112 case kind::REGEXP_CONCAT
: {
114 std::vector
< Node
> vec_nodes
;
115 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
117 int tmp
= delta( r
[i
], exp2
);
121 } else if(tmp
== 0) {
122 vec_nodes
.push_back( exp2
);
130 exp
= vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode(kind::AND
, vec_nodes
);
135 case kind::REGEXP_UNION
: {
137 std::vector
< Node
> vec_nodes
;
138 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
140 int tmp
= delta( r
[i
], exp2
);
144 } else if(tmp
== 0) {
145 vec_nodes
.push_back( exp2
);
153 exp
= vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode(kind::OR
, vec_nodes
);
158 case kind::REGEXP_INTER
: {
160 std::vector
< Node
> vec_nodes
;
161 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
163 int tmp
= delta( r
[i
], exp2
);
167 } else if(tmp
== 0) {
168 vec_nodes
.push_back( exp2
);
176 exp
= vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode(kind::AND
, vec_nodes
);
181 case kind::REGEXP_STAR
: {
185 case kind::REGEXP_PLUS
: {
186 ret
= delta( r
[0], exp
);
189 case kind::REGEXP_OPT
: {
193 case kind::REGEXP_RANGE
: {
197 case kind::REGEXP_LOOP
: {
201 ret
= delta(r
[0], exp
);
206 //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
211 exp
= Rewriter::rewrite(exp
);
213 std::pair
< int, Node
> p(ret
, exp
);
214 d_delta_cache
[r
] = p
;
216 Trace("regexp-delta") << "RegExp-Delta returns : " << ret
<< std::endl
;
220 // 0-unknown, 1-yes, 2-no
221 int RegExpOpr::derivativeS( Node r
, CVC4::String c
, Node
&retNode
) {
222 Assert( c
.size() < 2 );
223 Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r
) << "/, c=" << c
<< std::endl
;
226 retNode
= d_emptyRegexp
;
228 PairNodeStr dv
= std::make_pair( r
, c
);
229 if( d_deriv_cache
.find( dv
) != d_deriv_cache
.end() ) {
230 retNode
= d_deriv_cache
[dv
].first
;
231 ret
= d_deriv_cache
[dv
].second
;
232 } else if( c
.isEmptyString() ) {
234 ret
= delta( r
, expNode
);
236 retNode
= NodeManager::currentNM()->mkNode(kind::ITE
, expNode
, r
, d_emptyRegexp
);
237 } else if(ret
== 1) {
240 std::pair
< Node
, int > p(retNode
, ret
);
241 d_deriv_cache
[dv
] = p
;
243 switch( r
.getKind() ) {
244 case kind::REGEXP_EMPTY
: {
248 case kind::REGEXP_SIGMA
: {
249 retNode
= d_emptySingleton
;
252 case kind::REGEXP_RANGE
: {
253 CVC4::String a
= r
[0].getConst
<String
>();
254 CVC4::String b
= r
[1].getConst
<String
>();
255 retNode
= (a
<= c
&& c
<= b
) ? d_emptySingleton
: d_emptyRegexp
;
258 case kind::STRING_TO_REGEXP
: {
259 Node tmp
= Rewriter::rewrite(r
[0]);
261 if(tmp
== d_emptyString
) {
264 if (tmp
.getConst
<CVC4::String
>().front() == c
.front())
266 retNode
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
267 tmp
.getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkConst( tmp
.getConst
< CVC4::String
>().substr(1) ) );
275 if(tmp
.getKind() == kind::STRING_CONCAT
) {
278 if (t2
.getConst
<CVC4::String
>().front() == c
.front())
280 Node n
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
281 tmp
.getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkConst( tmp
.getConst
< CVC4::String
>().substr(1) ) );
282 std::vector
< Node
> vec_nodes
;
283 vec_nodes
.push_back(n
);
284 for(unsigned i
=1; i
<tmp
.getNumChildren(); i
++) {
285 vec_nodes
.push_back(tmp
[i
]);
287 retNode
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vec_nodes
);
294 std::vector
< Node
> vec_nodes
;
295 for(unsigned i
=1; i
<tmp
.getNumChildren(); i
++) {
296 vec_nodes
.push_back(tmp
[i
]);
298 rest
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vec_nodes
);
302 Node sk
= NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
303 retNode
= NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, sk
);
305 retNode
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, retNode
, rest
));
307 Node exp
= tmp
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
,
308 NodeManager::currentNM()->mkConst(c
), sk
));
309 retNode
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE
, exp
, retNode
, d_emptyRegexp
));
314 case kind::REGEXP_CONCAT
: {
315 std::vector
< Node
> vec_nodes
;
316 std::vector
< Node
> delta_nodes
;
318 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
321 int rt
= derivativeS(r
[i
], c
, dc
);
326 std::vector
< Node
> vec_nodes2
;
327 if(dc
!= d_emptySingleton
) {
328 vec_nodes2
.push_back( dc
);
330 for(unsigned j
=i
+1; j
<r
.getNumChildren(); ++j
) {
331 if(r
[j
] != d_emptySingleton
) {
332 vec_nodes2
.push_back( r
[j
] );
335 Node tmp
= vec_nodes2
.size()==0 ? d_emptySingleton
:
336 vec_nodes2
.size()==1 ? vec_nodes2
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, vec_nodes2
);
337 if(dnode
!= d_true
) {
338 tmp
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE
, dnode
, tmp
, d_emptyRegexp
));
341 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), tmp
) == vec_nodes
.end()) {
342 vec_nodes
.push_back( tmp
);
346 int rt2
= delta( r
[i
], exp3
);
348 dnode
= Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND
, dnode
, exp3
));
349 } else if( rt2
== 2 ) {
353 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
354 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
355 if(retNode
== d_emptyRegexp
) {
360 case kind::REGEXP_UNION
: {
361 std::vector
< Node
> vec_nodes
;
362 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
364 int rt
= derivativeS(r
[i
], c
, dc
);
369 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
370 vec_nodes
.push_back( dc
);
373 //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
375 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
376 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
377 if(retNode
== d_emptyRegexp
) {
382 case kind::REGEXP_INTER
: {
384 bool flag_sg
= false;
385 std::vector
< Node
> vec_nodes
;
386 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
388 int rt
= derivativeS(r
[i
], c
, dc
);
395 if(dc
== d_sigma_star
) {
398 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
399 vec_nodes
.push_back( dc
);
404 if(vec_nodes
.size() == 0 && flag_sg
) {
405 retNode
= d_sigma_star
;
407 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
408 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER
, vec_nodes
) );
409 if(retNode
== d_emptyRegexp
) {
414 retNode
= d_emptyRegexp
;
419 case kind::REGEXP_STAR
: {
421 ret
= derivativeS(r
[0], c
, dc
);
422 retNode
= dc
==d_emptyRegexp
? dc
: (dc
==d_emptySingleton
? r
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r
));
425 case kind::REGEXP_LOOP
: {
426 if(r
[1] == r
[2] && r
[1] == d_zero
) {
428 //retNode = d_emptyRegexp;
431 ret
= derivativeS(r
[0], c
, dc
);
432 if(dc
==d_emptyRegexp
) {
433 unsigned l
= r
[1].getConst
<Rational
>().getNumerator().toUnsignedInt();
434 unsigned u
= r
[2].getConst
<Rational
>().getNumerator().toUnsignedInt();
435 Node r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP
, r
[0],
436 NodeManager::currentNM()->mkConst(CVC4::Rational(l
==0? 0 : (l
-1))),
437 NodeManager::currentNM()->mkConst(CVC4::Rational(u
-1)));
438 retNode
= dc
==d_emptySingleton
? r2
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r2
);
440 retNode
= d_emptyRegexp
;
446 //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
450 if(retNode
!= d_emptyRegexp
) {
451 retNode
= Rewriter::rewrite( retNode
);
453 std::pair
< Node
, int > p(retNode
, ret
);
454 d_deriv_cache
[dv
] = p
;
457 Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode
) << "/" << std::endl
;
461 Node
RegExpOpr::derivativeSingle( Node r
, CVC4::String c
) {
462 Assert( c
.size() < 2 );
463 Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r
) << "/, c=" << c
<< std::endl
;
464 Node retNode
= d_emptyRegexp
;
465 PairNodeStr dv
= std::make_pair( r
, c
);
466 if( d_dv_cache
.find( dv
) != d_dv_cache
.end() ) {
467 retNode
= d_dv_cache
[dv
];
468 } else if( c
.isEmptyString() ){
470 int tmp
= delta( r
, exp
);
473 retNode
= d_emptyRegexp
;
474 } else if(tmp
== 1) {
477 retNode
= d_emptyRegexp
;
482 case kind::REGEXP_EMPTY
: {
483 retNode
= d_emptyRegexp
;
486 case kind::REGEXP_SIGMA
: {
487 retNode
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
);
490 case kind::REGEXP_RANGE
: {
491 CVC4::String a
= r
[0].getConst
<String
>();
492 CVC4::String b
= r
[1].getConst
<String
>();
493 retNode
= (a
<= c
&& c
<= b
) ? d_emptySingleton
: d_emptyRegexp
;
496 case kind::STRING_TO_REGEXP
: {
498 if(r
[0] == d_emptyString
) {
499 retNode
= d_emptyRegexp
;
501 if (r
[0].getConst
<CVC4::String
>().front() == c
.front())
503 retNode
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
504 r
[0].getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkConst( r
[0].getConst
< CVC4::String
>().substr(1) ) );
506 retNode
= d_emptyRegexp
;
511 retNode
= d_emptyRegexp
;
515 case kind::REGEXP_CONCAT
: {
516 Node rees
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
);
517 std::vector
< Node
> vec_nodes
;
518 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
519 Node dc
= derivativeSingle(r
[i
], c
);
520 if(dc
!= d_emptyRegexp
) {
521 std::vector
< Node
> vec_nodes2
;
523 vec_nodes2
.push_back( dc
);
525 for(unsigned j
=i
+1; j
<r
.getNumChildren(); ++j
) {
527 vec_nodes2
.push_back( r
[j
] );
530 Node tmp
= vec_nodes2
.size()==0 ? rees
:
531 vec_nodes2
.size()==1 ? vec_nodes2
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, vec_nodes2
);
532 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), tmp
) == vec_nodes
.end()) {
533 vec_nodes
.push_back( tmp
);
537 if( delta( r
[i
], exp
) != 1 ) {
541 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
542 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
545 case kind::REGEXP_UNION
: {
546 std::vector
< Node
> vec_nodes
;
547 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
548 Node dc
= derivativeSingle(r
[i
], c
);
549 if(dc
!= d_emptyRegexp
) {
550 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
551 vec_nodes
.push_back( dc
);
554 //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl;
556 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
557 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION
, vec_nodes
) );
560 case kind::REGEXP_INTER
: {
562 bool flag_sg
= false;
563 std::vector
< Node
> vec_nodes
;
564 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
565 Node dc
= derivativeSingle(r
[i
], c
);
566 if(dc
!= d_emptyRegexp
) {
567 if(dc
== d_sigma_star
) {
570 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
571 vec_nodes
.push_back( dc
);
580 if(vec_nodes
.size() == 0 && flag_sg
) {
581 retNode
= d_sigma_star
;
583 retNode
= vec_nodes
.size() == 0 ? d_emptyRegexp
:
584 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER
, vec_nodes
) );
587 retNode
= d_emptyRegexp
;
591 case kind::REGEXP_STAR
: {
592 Node dc
= derivativeSingle(r
[0], c
);
593 if(dc
!= d_emptyRegexp
) {
594 retNode
= dc
==d_emptySingleton
? r
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r
);
596 retNode
= d_emptyRegexp
;
600 case kind::REGEXP_LOOP
: {
601 if(r
[1] == r
[2] && r
[1] == d_zero
) {
602 retNode
= d_emptyRegexp
;
604 Node dc
= derivativeSingle(r
[0], c
);
605 if(dc
!= d_emptyRegexp
) {
606 unsigned l
= r
[1].getConst
<Rational
>().getNumerator().toUnsignedInt();
607 unsigned u
= r
[2].getConst
<Rational
>().getNumerator().toUnsignedInt();
608 Node r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP
, r
[0],
609 NodeManager::currentNM()->mkConst(CVC4::Rational(l
==0? 0 : (l
-1))),
610 NodeManager::currentNM()->mkConst(CVC4::Rational(u
-1)));
611 retNode
= dc
==d_emptySingleton
? r2
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r2
);
613 retNode
= d_emptyRegexp
;
616 //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
620 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in derivative of RegExp." << std::endl
;
624 if(retNode
!= d_emptyRegexp
) {
625 retNode
= Rewriter::rewrite( retNode
);
627 d_dv_cache
[dv
] = retNode
;
629 Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode
) << "/" << std::endl
;
633 void RegExpOpr::firstChars(Node r
, std::set
<unsigned> &pcset
, SetNodes
&pvset
)
635 Trace("regexp-fset") << "Start FSET(" << mkString(r
) << ")" << std::endl
;
636 std::map
<Node
, std::pair
<std::set
<unsigned>, SetNodes
> >::const_iterator itr
=
637 d_fset_cache
.find(r
);
638 if(itr
!= d_fset_cache
.end()) {
639 pcset
.insert((itr
->second
).first
.begin(), (itr
->second
).first
.end());
640 pvset
.insert((itr
->second
).second
.begin(), (itr
->second
).second
.end());
642 // cset is code points
643 std::set
<unsigned> cset
;
647 case kind::REGEXP_EMPTY
: {
650 case kind::REGEXP_SIGMA
: {
651 Assert(d_lastchar
< std::numeric_limits
<unsigned>::max());
652 for (unsigned i
= 0; i
<= d_lastchar
; i
++)
658 case kind::REGEXP_RANGE
: {
659 unsigned a
= r
[0].getConst
<String
>().front();
660 a
= String::convertUnsignedIntToCode(a
);
661 unsigned b
= r
[1].getConst
<String
>().front();
662 b
= String::convertUnsignedIntToCode(b
);
664 Assert(b
< std::numeric_limits
<unsigned>::max());
665 for (unsigned c
= a
; c
<= b
; c
++)
671 case kind::STRING_TO_REGEXP
: {
672 Node st
= Rewriter::rewrite(r
[0]);
674 CVC4::String s
= st
.getConst
< CVC4::String
>();
676 unsigned sc
= s
.front();
677 sc
= String::convertUnsignedIntToCode(sc
);
681 else if (st
.getKind() == kind::STRING_CONCAT
)
683 if(st
[0].isConst()) {
684 CVC4::String s
= st
[0].getConst
<CVC4::String
>();
685 unsigned sc
= s
.front();
686 sc
= String::convertUnsignedIntToCode(sc
);
689 vset
.insert( st
[0] );
698 case kind::REGEXP_CONCAT
: {
699 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
700 firstChars(r
[i
], cset
, vset
);
703 int r
= delta( n
, exp
);
710 case kind::REGEXP_UNION
: {
711 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
712 firstChars(r
[i
], cset
, vset
);
716 case kind::REGEXP_INTER
: {
717 //TODO: Overapproximation for now
718 //for(unsigned i=0; i<r.getNumChildren(); i++) {
719 // firstChars(r[i], cset, vset);
721 firstChars(r
[0], cset
, vset
);
724 case kind::REGEXP_STAR
: {
725 firstChars(r
[0], cset
, vset
);
728 case kind::REGEXP_LOOP
: {
729 firstChars(r
[0], cset
, vset
);
733 Trace("regexp-error") << "Unsupported term: " << r
<< " in firstChars." << std::endl
;
737 pcset
.insert(cset
.begin(), cset
.end());
738 pvset
.insert(vset
.begin(), vset
.end());
739 std::pair
<std::set
<unsigned>, SetNodes
> p(cset
, vset
);
743 if(Trace
.isOn("regexp-fset")) {
744 Trace("regexp-fset") << "END FSET(" << mkString(r
) << ") = {";
745 for (std::set
<unsigned>::const_iterator itr
= pcset
.begin();
749 if (itr
!= pcset
.begin())
751 Trace("regexp-fset") << ",";
753 Trace("regexp-fset") << (*itr
);
755 Trace("regexp-fset") << "}" << std::endl
;
760 void RegExpOpr::simplify(Node t
, std::vector
< Node
> &new_nodes
, bool polarity
) {
761 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t
<< ", polarity=" << polarity
<< std::endl
;
762 Assert(t
.getKind() == kind::STRING_IN_REGEXP
);
763 Node str
= Rewriter::rewrite(t
[0]);
764 Node re
= Rewriter::rewrite(t
[1]);
766 simplifyPRegExp( str
, re
, new_nodes
);
768 simplifyNRegExp( str
, re
, new_nodes
);
770 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes
.size() << "):\n";
771 for(unsigned i
=0; i
<new_nodes
.size(); i
++) {
772 Trace("strings-regexp-simpl") << "\t" << new_nodes
[i
] << std::endl
;
775 void RegExpOpr::simplifyNRegExp( Node s
, Node r
, std::vector
< Node
> &new_nodes
) {
776 std::pair
< Node
, Node
> p(s
, r
);
777 NodeManager
*nm
= NodeManager::currentNM();
778 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_simpl_neg_cache
.find(p
);
779 if(itr
!= d_simpl_neg_cache
.end()) {
780 new_nodes
.push_back( itr
->second
);
785 case kind::REGEXP_EMPTY
: {
789 case kind::REGEXP_SIGMA
: {
790 conc
= d_one
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
)).negate();
793 case kind::REGEXP_RANGE
: {
794 std::vector
< Node
> vec
;
795 unsigned a
= r
[0].getConst
<String
>().front();
796 a
= String::convertUnsignedIntToCode(a
);
797 unsigned b
= r
[1].getConst
<String
>().front();
798 b
= String::convertUnsignedIntToCode(b
);
799 for (unsigned c
= a
; c
<= b
; c
++)
801 std::vector
<unsigned> tmpVec
;
802 tmpVec
.push_back(String::convertCodeToUnsignedInt(c
));
803 Node tmp
= s
.eqNode(nm
->mkConst(String(tmpVec
))).negate();
804 vec
.push_back( tmp
);
806 conc
= vec
.size()==1? vec
[0] : NodeManager::currentNM()->mkNode(kind::AND
, vec
);
809 case kind::STRING_TO_REGEXP
: {
810 conc
= s
.eqNode(r
[0]).negate();
813 case kind::REGEXP_CONCAT
: {
814 // The following simplification states that
815 // ~( s in R1 ++ R2 )
818 // 0 <= x <= len(s) =>
819 // ~( substr(s,0,x) in R1 ) OR ~( substr(s,x,len(s)-x) in R2)
820 Node lens
= nm
->mkNode(STRING_LENGTH
, s
);
821 // the index we are removing from the RE concatenation
822 unsigned indexRm
= 0;
825 // As an optimization to the above reduction, if we can determine that
826 // all strings in the language of R1 have the same length, say n,
827 // then the conclusion of the reduction is quantifier-free:
828 // ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
829 Node reLength
= TheoryStringsRewriter::getFixedLengthForRegexp(r
[0]);
830 if (reLength
.isNull())
832 // try from the opposite end
833 unsigned indexE
= r
.getNumChildren() - 1;
834 reLength
= TheoryStringsRewriter::getFixedLengthForRegexp(r
[indexE
]);
835 if (!reLength
.isNull())
841 if (reLength
.isNull())
843 b1
= nm
->mkBoundVar(nm
->integerType());
844 b1v
= nm
->mkNode(BOUND_VAR_LIST
, b1
);
845 guard
= nm
->mkNode(AND
,
846 nm
->mkNode(GEQ
, b1
, d_zero
),
847 nm
->mkNode(GEQ
, nm
->mkNode(STRING_LENGTH
, s
), b1
));
857 s1
= nm
->mkNode(STRING_SUBSTR
, s
, d_zero
, b1
);
858 s2
= nm
->mkNode(STRING_SUBSTR
, s
, b1
, nm
->mkNode(MINUS
, lens
, b1
));
862 s1
= nm
->mkNode(STRING_SUBSTR
, s
, nm
->mkNode(MINUS
, lens
, b1
), b1
);
864 nm
->mkNode(STRING_SUBSTR
, s
, d_zero
, nm
->mkNode(MINUS
, lens
, b1
));
866 Node s1r1
= nm
->mkNode(STRING_IN_REGEXP
, s1
, r
[indexRm
]).negate();
867 std::vector
<Node
> nvec
;
868 for (unsigned i
= 0, nchild
= r
.getNumChildren(); i
< nchild
; i
++)
872 nvec
.push_back( r
[i
] );
875 Node r2
= nvec
.size() == 1 ? nvec
[0] : nm
->mkNode(REGEXP_CONCAT
, nvec
);
876 r2
= Rewriter::rewrite(r2
);
877 Node s2r2
= nm
->mkNode(STRING_IN_REGEXP
, s2
, r2
).negate();
878 conc
= nm
->mkNode(OR
, s1r1
, s2r2
);
881 conc
= nm
->mkNode(OR
, guard
.negate(), conc
);
882 conc
= nm
->mkNode(FORALL
, b1v
, conc
);
886 case kind::REGEXP_UNION
: {
887 std::vector
< Node
> c_and
;
888 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
889 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
890 c_and
.push_back( r
[i
][0].eqNode(s
).negate() );
891 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
894 c_and
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]).negate());
897 conc
= c_and
.size() == 0 ? d_true
:
898 c_and
.size() == 1 ? c_and
[0] : NodeManager::currentNM()->mkNode(kind::AND
, c_and
);
901 case kind::REGEXP_INTER
: {
902 bool emptyflag
= false;
903 std::vector
< Node
> c_or
;
904 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
905 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
906 c_or
.push_back( r
[i
][0].eqNode(s
).negate() );
907 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
911 c_or
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]).negate());
917 conc
= c_or
.size() == 1 ? c_or
[0] : NodeManager::currentNM()->mkNode(kind::OR
, c_or
);
921 case kind::REGEXP_STAR
: {
922 if(s
== d_emptyString
) {
924 } else if(r
[0].getKind() == kind::REGEXP_EMPTY
) {
925 conc
= s
.eqNode(d_emptyString
).negate();
926 } else if(r
[0].getKind() == kind::REGEXP_SIGMA
) {
929 Node lens
= NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
);
930 Node sne
= s
.eqNode(d_emptyString
).negate();
931 Node b1
= NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
932 Node b1v
= NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST
, b1
);
933 Node g1
= NodeManager::currentNM()->mkNode( kind::AND
, NodeManager::currentNM()->mkNode(kind::GEQ
, b1
, d_one
),
934 NodeManager::currentNM()->mkNode( kind::GEQ
, lens
, b1
) );
936 Node s1
= NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR
, s
, d_zero
, b1
);
937 Node s2
= NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR
, s
, b1
, NodeManager::currentNM()->mkNode(kind::MINUS
, lens
, b1
));
938 Node s1r1
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s1
, r
[0]).negate();
939 Node s2r2
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s2
, r
).negate();
941 conc
= NodeManager::currentNM()->mkNode(kind::OR
, s1r1
, s2r2
);
942 conc
= NodeManager::currentNM()->mkNode(kind::IMPLIES
, g1
, conc
);
943 conc
= NodeManager::currentNM()->mkNode(kind::FORALL
, b1v
, conc
);
944 conc
= NodeManager::currentNM()->mkNode(kind::AND
, sne
, conc
);
948 case kind::REGEXP_LOOP
: {
949 Assert(r
.getNumChildren() == 3);
952 conc
= s
.eqNode(d_emptyString
).negate();
953 } else if(r
[1] == d_one
) {
954 conc
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[0]).negate();
957 unsigned l
= r
[1].getConst
<Rational
>().getNumerator().toUnsignedInt();
958 std::vector
<Node
> vec
;
959 for(unsigned i
=0; i
<l
; i
++) {
962 Node r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vec
);
963 conc
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r2
).negate();
966 Assert(r
[1] == d_zero
);
968 unsigned u
= r
[2].getConst
<Rational
>().getNumerator().toUnsignedInt();
969 std::vector
<Node
> vec
;
970 vec
.push_back(d_emptySingleton
);
971 std::vector
<Node
> vec2
;
972 for(unsigned i
=1; i
<=u
; i
++) {
973 vec2
.push_back(r
[0]);
974 Node r2
= i
==1? r
[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vec
);
977 Node r3
= NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vec
);
978 conc
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r3
).negate();
983 Trace("strings-error") << "Unsupported term: " << r
<< " in simplifyNRegExp." << std::endl
;
984 Assert( false, "Unsupported Term" );
987 conc
= Rewriter::rewrite( conc
);
988 new_nodes
.push_back( conc
);
989 d_simpl_neg_cache
[p
] = conc
;
992 void RegExpOpr::simplifyPRegExp( Node s
, Node r
, std::vector
< Node
> &new_nodes
) {
993 std::pair
< Node
, Node
> p(s
, r
);
994 NodeManager
*nm
= NodeManager::currentNM();
995 std::map
< std::pair
< Node
, Node
>, Node
>::const_iterator itr
= d_simpl_cache
.find(p
);
996 if(itr
!= d_simpl_cache
.end()) {
997 new_nodes
.push_back( itr
->second
);
1002 case kind::REGEXP_EMPTY
: {
1006 case kind::REGEXP_SIGMA
: {
1007 conc
= d_one
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH
, s
));
1010 case kind::REGEXP_RANGE
: {
1011 conc
= s
.eqNode( r
[0] );
1013 unsigned a
= r
[0].getConst
<String
>().front();
1014 unsigned b
= r
[1].getConst
<String
>().front();
1016 std::vector
<unsigned> anvec
;
1018 Node an
= nm
->mkConst(String(anvec
));
1020 ? nm
->mkNode(kind::STRING_IN_REGEXP
,
1022 nm
->mkNode(kind::REGEXP_RANGE
, an
, r
[1]))
1024 conc
= NodeManager::currentNM()->mkNode(kind::OR
, conc
, tmp
);
1028 case kind::STRING_TO_REGEXP
: {
1029 conc
= s
.eqNode(r
[0]);
1032 case kind::REGEXP_CONCAT
: {
1033 std::vector
< Node
> nvec
;
1034 std::vector
< Node
> cc
;
1035 bool emptyflag
= false;
1036 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1037 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
1038 cc
.push_back( r
[i
][0] );
1039 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
1043 Node sk
= NodeManager::currentNM()->mkSkolem( "rc", s
.getType(), "created for regular expression concat" );
1044 Node lem
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk
, r
[i
]);
1045 nvec
.push_back(lem
);
1052 Node lem
= s
.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
, cc
) );
1053 nvec
.push_back(lem
);
1054 conc
= nvec
.size() == 1 ? nvec
[0] : NodeManager::currentNM()->mkNode(kind::AND
, nvec
);
1058 case kind::REGEXP_UNION
: {
1059 std::vector
< Node
> c_or
;
1060 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1061 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
1062 c_or
.push_back( r
[i
][0].eqNode(s
) );
1063 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
1066 c_or
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]));
1069 conc
= c_or
.size() == 0 ? d_false
:
1070 c_or
.size() == 1 ? c_or
[0] : NodeManager::currentNM()->mkNode(kind::OR
, c_or
);
1073 case kind::REGEXP_INTER
: {
1074 std::vector
< Node
> c_and
;
1075 bool emptyflag
= false;
1076 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1077 if(r
[i
].getKind() == kind::STRING_TO_REGEXP
) {
1078 c_and
.push_back( r
[i
][0].eqNode(s
) );
1079 } else if(r
[i
].getKind() == kind::REGEXP_EMPTY
) {
1083 c_and
.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[i
]));
1089 conc
= c_and
.size() == 1 ? c_and
[0] : NodeManager::currentNM()->mkNode(kind::AND
, c_and
);
1093 case kind::REGEXP_STAR
: {
1094 if(s
== d_emptyString
) {
1096 } else if(r
[0].getKind() == kind::REGEXP_EMPTY
) {
1097 conc
= s
.eqNode(d_emptyString
);
1098 } else if(r
[0].getKind() == kind::REGEXP_SIGMA
) {
1101 Node se
= s
.eqNode(d_emptyString
);
1102 Node sinr
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, s
, r
[0]);
1103 Node sk1
= NodeManager::currentNM()->mkSkolem( "rs", s
.getType(), "created for regular expression star" );
1104 Node sk2
= NodeManager::currentNM()->mkSkolem( "rs", s
.getType(), "created for regular expression star" );
1105 Node s1nz
= sk1
.eqNode(d_emptyString
).negate();
1106 Node s2nz
= sk2
.eqNode(d_emptyString
).negate();
1107 Node s1inr
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk1
, r
[0]);
1108 Node s2inrs
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk2
, r
);
1109 Node s12
= s
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
, sk1
, sk2
));
1111 conc
= NodeManager::currentNM()->mkNode(kind::AND
, s12
, s1nz
, s2nz
, s1inr
, s2inrs
);
1112 conc
= NodeManager::currentNM()->mkNode(kind::OR
, se
, sinr
, conc
);
1116 case kind::REGEXP_LOOP
: {
1117 Assert(r
.getNumChildren() == 3);
1118 if(r
[1] == d_zero
) {
1119 if(r
[2] == d_zero
) {
1120 conc
= s
.eqNode( d_emptyString
);
1123 if(s
!= d_emptyString
) {
1124 Node sk1
= NodeManager::currentNM()->mkSkolem( "lps", s
.getType(), "created for regular expression loop" );
1125 Node sk2
= NodeManager::currentNM()->mkSkolem( "lps", s
.getType(), "created for regular expression loop" );
1126 Node seq12
= s
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
, sk1
, sk2
));
1127 Node sk1ne
= sk1
.eqNode(d_emptyString
).negate();
1128 Node sk1inr
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk1
, r
[0]);
1129 unsigned u
= r
[2].getConst
<Rational
>().getNumerator().toUnsignedInt();
1130 Node u1
= NodeManager::currentNM()->mkConst(CVC4::Rational(u
- 1));
1131 Node sk2inru
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk2
,
1132 NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP
, r
[0], d_zero
, u1
));
1133 conc
= NodeManager::currentNM()->mkNode(kind::AND
, seq12
, sk1ne
, sk1inr
, sk2inru
);
1134 conc
= NodeManager::currentNM()->mkNode(kind::OR
,
1135 s
.eqNode(d_emptyString
), conc
);
1142 Node sk1
= NodeManager::currentNM()->mkSkolem( "lps", s
.getType(), "created for regular expression loop" );
1143 Node sk2
= NodeManager::currentNM()->mkSkolem( "lps", s
.getType(), "created for regular expression loop" );
1144 Node seq12
= s
.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT
, sk1
, sk2
));
1145 Node sk1ne
= sk1
.eqNode(d_emptyString
).negate();
1146 Node sk1inr
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk1
, r
[0]);
1147 unsigned u
= r
[2].getConst
<Rational
>().getNumerator().toUnsignedInt();
1148 Node u1
= NodeManager::currentNM()->mkConst(CVC4::Rational(u
- 1));
1149 Node sk2inru
= NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP
, sk2
,
1150 NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP
, r
[0], u1
, u1
));
1151 conc
= NodeManager::currentNM()->mkNode(kind::AND
, seq12
, sk1ne
, sk1inr
, sk2inru
);
1156 Trace("strings-error") << "Unsupported term: " << r
<< " in simplifyPRegExp." << std::endl
;
1157 Assert( false, "Unsupported Term" );
1160 conc
= Rewriter::rewrite( conc
);
1161 new_nodes
.push_back( conc
);
1162 d_simpl_cache
[p
] = conc
;
1166 bool RegExpOpr::isPairNodesInSet(std::set
< PairNodes
> &s
, Node n1
, Node n2
) {
1167 for(std::set
< PairNodes
>::const_iterator itr
= s
.begin();
1168 itr
!= s
.end(); ++itr
) {
1169 if((itr
->first
== n1
&& itr
->second
== n2
) ||
1170 (itr
->first
== n2
&& itr
->second
== n1
)) {
1177 bool RegExpOpr::containC2(unsigned cnt
, Node n
) {
1178 if(n
.getKind() == kind::REGEXP_RV
) {
1179 Assert(n
[0].getConst
<Rational
>() <= Rational(String::maxSize()),
1180 "Exceeded UINT32_MAX in RegExpOpr::containC2");
1181 unsigned y
= n
[0].getConst
<Rational
>().getNumerator().toUnsignedInt();
1183 } else if(n
.getKind() == kind::REGEXP_CONCAT
) {
1184 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1185 if(containC2(cnt
, n
[i
])) {
1189 } else if(n
.getKind() == kind::REGEXP_STAR
) {
1190 return containC2(cnt
, n
[0]);
1191 } else if(n
.getKind() == kind::REGEXP_LOOP
) {
1192 return containC2(cnt
, n
[0]);
1193 } else if(n
.getKind() == kind::REGEXP_UNION
) {
1194 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1195 if(containC2(cnt
, n
[i
])) {
1202 Node
RegExpOpr::convert1(unsigned cnt
, Node n
) {
1203 Trace("regexp-debug") << "Converting " << n
<< " at " << cnt
<< "... " << std::endl
;
1205 convert2(cnt
, n
, r1
, r2
);
1206 Trace("regexp-debug") << "... getting r1=" << r1
<< ", and r2=" << r2
<< std::endl
;
1207 Node ret
= r1
==d_emptySingleton
? r2
: NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
,
1208 NodeManager::currentNM()->mkNode(kind::REGEXP_STAR
, r1
), r2
);
1209 ret
= Rewriter::rewrite( ret
);
1210 Trace("regexp-debug") << "... done convert at " << cnt
<< ", with return " << ret
<< std::endl
;
1213 void RegExpOpr::convert2(unsigned cnt
, Node n
, Node
&r1
, Node
&r2
) {
1214 if(n
== d_emptyRegexp
) {
1217 } else if(n
== d_emptySingleton
) {
1218 r1
= d_emptySingleton
;
1219 r2
= d_emptySingleton
;
1220 } else if(n
.getKind() == kind::REGEXP_RV
) {
1221 Assert(n
[0].getConst
<Rational
>() <= Rational(String::maxSize()),
1222 "Exceeded UINT32_MAX in RegExpOpr::convert2");
1223 unsigned y
= n
[0].getConst
<Rational
>().getNumerator().toUnsignedInt();
1224 r1
= d_emptySingleton
;
1230 } else if(n
.getKind() == kind::REGEXP_CONCAT
) {
1232 std::vector
<Node
> vr1
, vr2
;
1233 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1234 if(containC2(cnt
, n
[i
])) {
1236 convert2(cnt
, n
[i
], t1
, t2
);
1238 r1
= vr1
.size()==0 ? d_emptyRegexp
: vr1
.size()==1 ? vr1
[0] :
1239 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vr1
);
1241 for( unsigned j
=i
+1; j
<n
.getNumChildren(); j
++ ) {
1242 vr2
.push_back(n
[j
]);
1244 r2
= vr2
.size()==0 ? d_emptyRegexp
: vr2
.size()==1 ? vr2
[0] :
1245 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vr2
);
1249 vr1
.push_back(n
[i
]);
1253 r1
= d_emptySingleton
;
1256 } else if(n
.getKind() == kind::REGEXP_UNION
) {
1257 std::vector
<Node
> vr1
, vr2
;
1258 for( unsigned i
=0; i
<n
.getNumChildren(); i
++ ) {
1260 convert2(cnt
, n
[i
], t1
, t2
);
1264 r1
= NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vr1
);
1265 r2
= NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vr2
);
1266 } else if(n
.getKind() == kind::STRING_TO_REGEXP
|| n
.getKind() == kind::REGEXP_SIGMA
|| n
.getKind() == kind::REGEXP_RANGE
) {
1267 r1
= d_emptySingleton
;
1269 } else if(n
.getKind() == kind::REGEXP_LOOP
) {
1271 r1
= d_emptySingleton
;
1280 bool RegExpOpr::testNoRV(Node r
) {
1281 std::map
< Node
, bool >::const_iterator itr
= d_norv_cache
.find(r
);
1282 if(itr
!= d_norv_cache
.end()) {
1285 if(r
.getKind() == kind::REGEXP_RV
) {
1287 } else if(r
.getNumChildren() > 1) {
1288 for(unsigned int i
=0; i
<r
.getNumChildren(); i
++) {
1289 if(!testNoRV(r
[i
])) {
1298 Node
RegExpOpr::intersectInternal( Node r1
, Node r2
, std::map
< PairNodes
, Node
> cache
, unsigned cnt
) {
1299 //Assert(checkConstRegExp(r1) && checkConstRegExp(r2));
1305 Trace("regexp-int") << "Starting INTERSECT(" << cnt
<< "):\n "<< mkString(r1
) << ",\n " << mkString(r2
) << std::endl
;
1306 //if(Trace.isOn("regexp-debug")) {
1307 // Trace("regexp-debug") << "... with cache:\n";
1308 // for(std::map< PairNodes, Node >::const_iterator itr=cache.begin();
1309 // itr!=cache.end();itr++) {
1310 // Trace("regexp-debug") << "(" << itr->first.first << "," << itr->first.second << ")->" << itr->second << std::endl;
1313 std::pair
< Node
, Node
> p(r1
, r2
);
1314 std::map
< PairNodes
, Node
>::const_iterator itr
= d_inter_cache
.find(p
);
1316 if(itr
!= d_inter_cache
.end()) {
1317 rNode
= itr
->second
;
1319 Trace("regexp-int-debug") << " ... not in cache" << std::endl
;
1320 if(r1
== d_emptyRegexp
|| r2
== d_emptyRegexp
) {
1321 Trace("regexp-int-debug") << " ... one is empty set" << std::endl
;
1322 rNode
= d_emptyRegexp
;
1323 } else if(r1
== d_emptySingleton
|| r2
== d_emptySingleton
) {
1324 Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl
;
1326 int r
= delta((r1
== d_emptySingleton
? r2
: r1
), exp
);
1331 rNode
= d_emptySingleton
;
1333 rNode
= d_emptyRegexp
;
1335 } else if(r1
== r2
) {
1336 Trace("regexp-int-debug") << " ... equal" << std::endl
;
1337 rNode
= r1
; //convert1(cnt, r1);
1339 Trace("regexp-int-debug") << " ... normal checking" << std::endl
;
1340 std::map
< PairNodes
, Node
>::const_iterator itrcache
= cache
.find(p
);
1341 if(itrcache
!= cache
.end()) {
1342 rNode
= itrcache
->second
;
1344 Trace("regexp-int-debug") << " ... normal without cache" << std::endl
;
1345 std::vector
<unsigned> cset
;
1346 std::set
<unsigned> cset1
, cset2
;
1347 std::set
< Node
> vset1
, vset2
;
1348 firstChars(r1
, cset1
, vset1
);
1349 firstChars(r2
, cset2
, vset2
);
1350 Trace("regexp-int-debug") << " ... got fset" << std::endl
;
1351 std::set_intersection(cset1
.begin(), cset1
.end(), cset2
.begin(), cset2
.end(),
1352 std::inserter(cset
, cset
.begin()));
1353 std::vector
< Node
> vec_nodes
;
1355 Trace("regexp-int-debug") << " ... try delta" << std::endl
;
1356 int flag
= delta(r1
, delta_exp
);
1357 int flag2
= delta(r2
, delta_exp
);
1358 Trace("regexp-int-debug") << " ... delta1=" << flag
<< ", delta2=" << flag2
<< std::endl
;
1359 if(flag
!= 2 && flag2
!= 2) {
1360 if(flag
== 1 && flag2
== 1) {
1361 vec_nodes
.push_back(d_emptySingleton
);
1367 if(Trace
.isOn("regexp-int-debug")) {
1368 Trace("regexp-int-debug") << "Try CSET(" << cset
.size() << ") = {";
1369 for (std::vector
<unsigned>::const_iterator itr
= cset
.begin();
1373 //CVC4::String c( *itr );
1374 if(itr
!= cset
.begin()) {
1375 Trace("regexp-int-debug") << ", ";
1377 Trace("regexp-int-debug") << ( *itr
);
1379 Trace("regexp-int-debug") << std::endl
;
1381 std::map
< PairNodes
, Node
> cacheX
;
1382 for (std::vector
<unsigned>::const_iterator itr
= cset
.begin();
1386 std::vector
<unsigned> cvec
;
1387 cvec
.push_back(String::convertCodeToUnsignedInt(*itr
));
1389 Trace("regexp-int-debug") << "Try character " << c
<< " ... " << std::endl
;
1390 Node r1l
= derivativeSingle(r1
, c
);
1391 Node r2l
= derivativeSingle(r2
, c
);
1392 Trace("regexp-int-debug") << " ... got partial(r1,c) = " << mkString(r1l
) << std::endl
;
1393 Trace("regexp-int-debug") << " ... got partial(r2,c) = " << mkString(r2l
) << std::endl
;
1398 r1l
= r2l
; r2l
= tnode
;
1400 PairNodes
pp(r1l
, r2l
);
1401 std::map
< PairNodes
, Node
>::const_iterator itr2
= cacheX
.find(pp
);
1402 if(itr2
!= cacheX
.end()) {
1405 std::map
< PairNodes
, Node
> cache2(cache
);
1406 cache2
[ p
] = NodeManager::currentNM()->mkNode(kind::REGEXP_RV
, NodeManager::currentNM()->mkConst(CVC4::Rational(cnt
)));
1407 rt
= intersectInternal(r1l
, r2l
, cache2
, cnt
+1);
1411 rt
= Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
,
1412 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst(c
)), rt
) );
1414 Trace("regexp-int-debug") << " ... got p(r1,c) && p(r2,c) = " << mkString(rt
) << std::endl
;
1415 vec_nodes
.push_back(rt
);
1417 rNode
= Rewriter::rewrite( vec_nodes
.size()==0 ? d_emptyRegexp
: vec_nodes
.size()==1 ? vec_nodes
[0] :
1418 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vec_nodes
) );
1419 rNode
= convert1(cnt
, rNode
);
1420 rNode
= Rewriter::rewrite( rNode
);
1423 Trace("regexp-int-debug") << " ... try testing no RV of " << mkString(rNode
) << std::endl
;
1424 if(testNoRV(rNode
)) {
1425 d_inter_cache
[p
] = rNode
;
1428 Trace("regexp-int") << "End(" << cnt
<< ") of INTERSECT( " << mkString(r1
) << ", " << mkString(r2
) << " ) = " << mkString(rNode
) << std::endl
;
1432 Node
RegExpOpr::removeIntersection(Node r
) {
1433 Assert( checkConstRegExp(r
) );
1434 std::map
< Node
, Node
>::const_iterator itr
= d_rm_inter_cache
.find(r
);
1436 if(itr
!= d_rm_inter_cache
.end()) {
1437 retNode
= itr
->second
;
1439 switch(r
.getKind()) {
1440 case kind::REGEXP_EMPTY
: {
1444 case kind::REGEXP_SIGMA
: {
1448 case kind::REGEXP_RANGE
: {
1452 case kind::STRING_TO_REGEXP
: {
1456 case kind::REGEXP_CONCAT
: {
1457 std::vector
< Node
> vec_nodes
;
1458 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1459 Node tmpNode
= removeIntersection( r
[i
] );
1460 vec_nodes
.push_back( tmpNode
);
1462 retNode
= Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT
, vec_nodes
) );
1465 case kind::REGEXP_UNION
: {
1466 std::vector
< Node
> vec_nodes
;
1467 for(unsigned i
=0; i
<r
.getNumChildren(); i
++) {
1468 Node tmpNode
= removeIntersection( r
[i
] );
1469 vec_nodes
.push_back( tmpNode
);
1471 retNode
= Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_UNION
, vec_nodes
) );
1474 case kind::REGEXP_INTER
: {
1475 retNode
= removeIntersection( r
[0] );
1476 for(unsigned i
=1; i
<r
.getNumChildren(); i
++) {
1477 bool spflag
= false;
1478 Node tmpNode
= removeIntersection( r
[i
] );
1479 retNode
= intersect( retNode
, tmpNode
, spflag
);
1483 case kind::REGEXP_STAR
: {
1484 retNode
= removeIntersection( r
[0] );
1485 retNode
= Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_STAR
, retNode
) );
1488 case kind::REGEXP_LOOP
: {
1489 retNode
= removeIntersection( r
[0] );
1490 retNode
= Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP
, retNode
, r
[1], r
[2]) );
1497 d_rm_inter_cache
[r
] = retNode
;
1499 Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r
) << " ) = " << mkString(retNode
) << std::endl
;
1503 Node
RegExpOpr::intersect(Node r1
, Node r2
, bool &spflag
) {
1504 if(checkConstRegExp(r1
) && checkConstRegExp(r2
)) {
1505 Node rr1
= removeIntersection(r1
);
1506 Node rr2
= removeIntersection(r2
);
1507 std::map
< PairNodes
, Node
> cache
;
1508 Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1
) << ",\n\t"<< mkString(r2
) << ")" << std::endl
;
1509 Node retNode
= intersectInternal(rr1
, rr2
, cache
, 1);
1510 Trace("regexp-intersect") << "End INTERSECTION(\n\t" << mkString(r1
) << ",\n\t"<< mkString(r2
) << ") =\n\t" << mkString(retNode
) << std::endl
;
1514 return Node::null();
1519 std::string
RegExpOpr::niceChar(Node r
) {
1521 std::string s
= r
.getConst
<CVC4::String
>().toString() ;
1522 return s
== "." ? "\\." : s
;
1524 std::string ss
= "$" + r
.toString();
1528 std::string
RegExpOpr::mkString( Node r
) {
1533 int k
= r
.getKind();
1535 case kind::REGEXP_EMPTY
: {
1539 case kind::REGEXP_SIGMA
: {
1543 case kind::STRING_TO_REGEXP
: {
1544 std::string
tmp( niceChar( r
[0] ) );
1545 retStr
+= tmp
.size()==1? tmp
: "(" + tmp
+ ")";
1548 case kind::REGEXP_CONCAT
: {
1550 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1551 //if(i != 0) retStr += ".";
1552 retStr
+= mkString( r
[i
] );
1557 case kind::REGEXP_UNION
: {
1559 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1560 if(i
!= 0) retStr
+= "|";
1561 retStr
+= mkString( r
[i
] );
1566 case kind::REGEXP_INTER
: {
1568 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
1569 if(i
!= 0) retStr
+= "&";
1570 retStr
+= mkString( r
[i
] );
1575 case kind::REGEXP_STAR
: {
1576 retStr
+= mkString( r
[0] );
1580 case kind::REGEXP_PLUS
: {
1581 retStr
+= mkString( r
[0] );
1585 case kind::REGEXP_OPT
: {
1586 retStr
+= mkString( r
[0] );
1590 case kind::REGEXP_RANGE
: {
1592 retStr
+= niceChar( r
[0] );
1594 retStr
+= niceChar( r
[1] );
1598 case kind::REGEXP_LOOP
: {
1600 retStr
+= mkString(r
[0]);
1603 retStr
+= r
[1].getConst
<Rational
>().toString();
1605 if(r
.getNumChildren() == 3) {
1606 retStr
+= r
[2].getConst
<Rational
>().toString();
1611 case kind::REGEXP_RV
: {
1613 retStr
+= r
[0].getConst
<Rational
>().getNumerator().toString();
1618 Trace("strings-error") << "Unsupported term: " << r
<< " in RegExp." << std::endl
;
1620 //return Node::null();
1627 }/* CVC4::theory::strings namespace */
1628 }/* CVC4::theory namespace */
1629 }/* CVC4 namespace */