1 /********************* */
2 /*! \file regexp_solver.cpp
4 ** Top contributors (to current version):
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
12 ** \brief Implementation of the regular expression solver for the theory of
17 #include "theory/strings/regexp_solver.h"
21 #include "options/strings_options.h"
22 #include "theory/strings/theory_strings.h"
23 #include "theory/strings/theory_strings_rewriter.h"
24 #include "theory/theory_model.h"
27 using namespace CVC4::context
;
28 using namespace CVC4::kind
;
34 RegExpSolver::RegExpSolver(TheoryStrings
& p
,
36 context::UserContext
* u
)
38 d_regexp_memberships(c
),
45 d_processed_memberships(c
)
47 d_emptyString
= NodeManager::currentNM()->mkConst(::CVC4::String(""));
48 std::vector
<Node
> nvec
;
49 d_emptyRegexp
= NodeManager::currentNM()->mkNode(REGEXP_EMPTY
, nvec
);
50 d_true
= NodeManager::currentNM()->mkConst(true);
51 d_false
= NodeManager::currentNM()->mkConst(false);
54 unsigned RegExpSolver::getNumMemberships(Node n
, bool isPos
)
58 NodeUIntMap::const_iterator it
= d_pos_memberships
.find(n
);
59 if (it
!= d_pos_memberships
.end())
66 NodeUIntMap::const_iterator it
= d_neg_memberships
.find(n
);
67 if (it
!= d_neg_memberships
.end())
75 Node
RegExpSolver::getMembership(Node n
, bool isPos
, unsigned i
)
77 return isPos
? d_pos_memberships_data
[n
][i
] : d_neg_memberships_data
[n
][i
];
80 Node
RegExpSolver::mkAnd(Node c1
, Node c2
)
82 return NodeManager::currentNM()->mkNode(AND
, c1
, c2
);
85 void RegExpSolver::check()
87 bool addedLemma
= false;
89 std::vector
<Node
> processed
;
90 std::vector
<Node
> cprocessed
;
92 Trace("regexp-debug") << "Checking Memberships ... " << std::endl
;
93 for (NodeUIntMap::const_iterator itr_xr
= d_pos_memberships
.begin();
94 itr_xr
!= d_pos_memberships
.end();
98 Node x
= (*itr_xr
).first
;
99 Trace("regexp-debug") << "Checking Memberships for " << x
<< std::endl
;
100 if (d_inter_index
.find(x
) == d_inter_index
.end())
102 d_inter_index
[x
] = 0;
104 int cur_inter_idx
= d_inter_index
[x
];
105 unsigned n_pmem
= (*itr_xr
).second
;
106 Assert(getNumMemberships(x
, true) == n_pmem
);
107 if (cur_inter_idx
!= (int)n_pmem
)
111 d_inter_cache
[x
] = getMembership(x
, true, 0);
112 d_inter_index
[x
] = 1;
113 Trace("regexp-debug") << "... only one choice " << std::endl
;
118 if (d_inter_cache
.find(x
) != d_inter_cache
.end())
120 r
= d_inter_cache
[x
];
124 r
= getMembership(x
, true, 0);
128 unsigned k_start
= cur_inter_idx
;
129 Trace("regexp-debug") << "... staring from : " << cur_inter_idx
130 << ", we have " << n_pmem
<< std::endl
;
131 for (unsigned k
= k_start
; k
< n_pmem
; k
++)
133 Node r2
= getMembership(x
, true, k
);
134 r
= d_regexp_opr
.intersect(r
, r2
, spflag
);
139 else if (r
== d_emptyRegexp
)
141 std::vector
<Node
> vec_nodes
;
142 for (unsigned kk
= 0; kk
<= k
; kk
++)
144 Node rr
= getMembership(x
, true, kk
);
146 NodeManager::currentNM()->mkNode(STRING_IN_REGEXP
, x
, rr
);
147 vec_nodes
.push_back(n
);
150 d_parent
.sendInference(vec_nodes
, conc
, "INTERSECT CONFLICT", true);
154 if (d_parent
.inConflict())
160 if (!d_parent
.inConflict() && !spflag
)
162 d_inter_cache
[x
] = r
;
163 d_inter_index
[x
] = (int)n_pmem
;
169 Trace("regexp-debug")
170 << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
174 NodeManager
* nm
= NodeManager::currentNM();
175 for (unsigned i
= 0; i
< d_regexp_memberships
.size(); i
++)
177 // check regular expression membership
178 Node assertion
= d_regexp_memberships
[i
];
179 Trace("regexp-debug")
180 << "Check : " << assertion
<< " "
181 << (d_regexp_ucached
.find(assertion
) == d_regexp_ucached
.end()) << " "
182 << (d_regexp_ccached
.find(assertion
) == d_regexp_ccached
.end())
184 if (d_regexp_ucached
.find(assertion
) == d_regexp_ucached
.end()
185 && d_regexp_ccached
.find(assertion
) == d_regexp_ccached
.end())
187 Trace("strings-regexp")
188 << "We have regular expression assertion : " << assertion
190 Node atom
= assertion
.getKind() == NOT
? assertion
[0] : assertion
;
191 bool polarity
= assertion
.getKind() != NOT
;
195 std::vector
<Node
> rnfexp
;
199 x
= d_parent
.getNormalString(x
, rnfexp
);
202 if (!d_regexp_opr
.checkConstRegExp(r
))
204 r
= getNormalSymRegExp(r
, rnfexp
);
207 Trace("strings-regexp-nf") << "Term " << atom
<< " is normalized to "
208 << x
<< " IN " << r
<< std::endl
;
211 Node tmp
= Rewriter::rewrite(nm
->mkNode(STRING_IN_REGEXP
, x
, r
));
218 d_regexp_ccached
.insert(assertion
);
221 else if (tmp
== d_false
)
223 std::vector
<Node
> exp_n
;
224 exp_n
.push_back(assertion
);
225 Node conc
= Node::null();
226 d_parent
.sendInference(rnfexp
, exp_n
, conc
, "REGEXP NF Conflict");
234 flag
= checkPDerivative(x
, r
, atom
, addedLemma
, rnfexp
);
238 if (!options::stringExp())
240 throw LogicException(
241 "Strings Incomplete (due to Negative Membership) by default, "
242 "try --strings-exp option.");
247 // check if the term is atomic
248 Node xr
= d_parent
.getRepresentative(x
);
249 Trace("strings-regexp")
250 << "Unroll/simplify membership of atomic term " << xr
252 // if so, do simple unrolling
253 std::vector
<Node
> nvec
;
256 d_regexp_opr
.simplify(atom
, nvec
, polarity
);
258 std::vector
<Node
> exp_n
;
259 exp_n
.push_back(assertion
);
260 Node conc
= nvec
.size() == 1 ? nvec
[0] : nm
->mkNode(AND
, nvec
);
261 conc
= Rewriter::rewrite(conc
);
262 d_parent
.sendInference(rnfexp
, exp_n
, conc
, "REGEXP_Unfold");
266 cprocessed
.push_back(assertion
);
270 processed
.push_back(assertion
);
274 if (d_parent
.inConflict())
282 if (!d_parent
.inConflict())
284 for (unsigned i
= 0; i
< processed
.size(); i
++)
286 Trace("strings-regexp")
287 << "...add " << processed
[i
] << " to u-cache." << std::endl
;
288 d_regexp_ucached
.insert(processed
[i
]);
290 for (unsigned i
= 0; i
< cprocessed
.size(); i
++)
292 Trace("strings-regexp")
293 << "...add " << cprocessed
[i
] << " to c-cache." << std::endl
;
294 d_regexp_ccached
.insert(cprocessed
[i
]);
300 bool RegExpSolver::checkPDerivative(
301 Node x
, Node r
, Node atom
, bool& addedLemma
, std::vector
<Node
>& nf_exp
)
303 if (d_parent
.areEqual(x
, d_emptyString
))
306 switch (d_regexp_opr
.delta(r
, exp
))
310 std::vector
<Node
> exp_n
;
311 exp_n
.push_back(atom
);
312 exp_n
.push_back(x
.eqNode(d_emptyString
));
313 d_parent
.sendInference(nf_exp
, exp_n
, exp
, "RegExp Delta");
315 d_regexp_ccached
.insert(atom
);
320 d_regexp_ccached
.insert(atom
);
325 std::vector
<Node
> exp_n
;
326 exp_n
.push_back(atom
);
327 exp_n
.push_back(x
.eqNode(d_emptyString
));
329 d_parent
.sendInference(nf_exp
, exp_n
, conc
, "RegExp Delta CONFLICT");
331 d_regexp_ccached
.insert(atom
);
341 if (deriveRegExp(x
, r
, atom
, nf_exp
))
344 d_regexp_ccached
.insert(atom
);
351 CVC4::String
RegExpSolver::getHeadConst(Node x
)
355 return x
.getConst
<String
>();
357 else if (x
.getKind() == STRING_CONCAT
)
361 return x
[0].getConst
<String
>();
364 return d_emptyString
.getConst
<String
>();
367 bool RegExpSolver::deriveRegExp(Node x
,
370 std::vector
<Node
>& ant
)
372 Assert(x
!= d_emptyString
);
373 Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
374 << ", r= " << r
<< std::endl
;
375 CVC4::String s
= getHeadConst(x
);
376 if (!s
.isEmptyString() && d_regexp_opr
.checkConstRegExp(r
))
378 Node conc
= Node::null();
381 for (unsigned i
= 0; i
< s
.size(); ++i
)
383 CVC4::String c
= s
.substr(i
, 1);
385 int rt
= d_regexp_opr
.derivativeS(dc
, c
, dc2
);
400 "Impossible: RegExpSolver::deriveRegExp: const string in const "
401 "regular expression.");
406 Assert(x
.getKind() == STRING_CONCAT
);
407 std::vector
<Node
> vec_nodes
;
408 for (unsigned int i
= 1; i
< x
.getNumChildren(); ++i
)
410 vec_nodes
.push_back(x
[i
]);
412 Node left
= TheoryStringsRewriter::mkConcat(STRING_CONCAT
, vec_nodes
);
413 left
= Rewriter::rewrite(left
);
414 conc
= NodeManager::currentNM()->mkNode(STRING_IN_REGEXP
, left
, dc
);
417 std::vector
<Node
> exp_n
;
418 exp_n
.push_back(atom
);
419 d_parent
.sendInference(ant
, exp_n
, conc
, "RegExp-Derive");
425 void RegExpSolver::addMembership(Node assertion
)
427 bool polarity
= assertion
.getKind() != NOT
;
428 TNode atom
= polarity
? assertion
: assertion
[0];
434 NodeUIntMap::const_iterator it
= d_pos_memberships
.find(x
);
435 if (it
!= d_pos_memberships
.end())
437 index
= (*it
).second
;
438 for (unsigned k
= 0; k
< index
; k
++)
440 if (k
< d_pos_memberships_data
[x
].size())
442 if (d_pos_memberships_data
[x
][k
] == r
)
453 d_pos_memberships
[x
] = index
+ 1;
454 if (index
< d_pos_memberships_data
[x
].size())
456 d_pos_memberships_data
[x
][index
] = r
;
460 d_pos_memberships_data
[x
].push_back(r
);
463 else if (!options::stringIgnNegMembership())
466 NodeUIntMap::const_iterator it
= d_neg_memberships
.find(x
);
467 if (it
!= d_neg_memberships
.end())
469 index
= (*it
).second
;
470 for (unsigned k
= 0; k
< index
; k
++)
472 if (k
< d_neg_memberships_data
[x
].size())
474 if (d_neg_memberships_data
[x
][k
] == r
)
485 d_neg_memberships
[x
] = index
+ 1;
486 if (index
< d_neg_memberships_data
[x
].size())
488 d_neg_memberships_data
[x
][index
] = r
;
492 d_neg_memberships_data
[x
].push_back(r
);
496 if (polarity
|| !options::stringIgnNegMembership())
498 d_regexp_memberships
.push_back(assertion
);
502 Node
RegExpSolver::getNormalSymRegExp(Node r
, std::vector
<Node
>& nf_exp
)
508 case REGEXP_SIGMA
: break;
509 case STRING_TO_REGEXP
:
513 Node tmp
= d_parent
.getNormalString(r
[0], nf_exp
);
516 ret
= NodeManager::currentNM()->mkNode(STRING_TO_REGEXP
, tmp
);
526 std::vector
<Node
> vec_nodes
;
527 for (const Node
& cr
: r
)
529 vec_nodes
.push_back(getNormalSymRegExp(cr
, nf_exp
));
531 ret
= Rewriter::rewrite(
532 NodeManager::currentNM()->mkNode(r
.getKind(), vec_nodes
));
537 Trace("strings-error") << "Unsupported term: " << r
538 << " in normalization SymRegExp." << std::endl
;
545 } // namespace strings
546 } // namespace theory