if(PYTHONINTERP_FOUND AND CxxTest_PYTHON_TESTGEN_EXECUTABLE)
set(CxxTest_TESTGEN_EXECUTABLE ${CxxTest_PYTHON_TESTGEN_EXECUTABLE})
- set(CxxTest_TESTGEN_INTERPRETER ${PYTHON_EXECUTABLE})
+ # set(CxxTest_TESTGEN_INTERPRETER ${PYTHON_EXECUTABLE})
elseif(PERL_FOUND AND CxxTest_PERL_TESTGEN_EXECUTABLE)
set(CxxTest_TESTGEN_EXECUTABLE ${CxxTest_PERL_TESTGEN_EXECUTABLE})
set(CxxTest_TESTGEN_INTERPRETER ${PERL_EXECUTABLE})
#include "expr/kind.h"
#include "options/strings_options.h"
#include "theory/strings/theory_strings_rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
using namespace CVC4;
using namespace CVC4::kind;
return retStr;
}
+bool RegExpOpr::regExpIncludes(Node r1, Node r2)
+{
+ Assert(Rewriter::rewrite(r1) == r1);
+ Assert(Rewriter::rewrite(r2) == r2);
+
+ if (r1 == r2)
+ {
+ return true;
+ }
+
+ // This method only works on a fragment of regular expressions
+ if (!utils::isSimpleRegExp(r1) || !utils::isSimpleRegExp(r2))
+ {
+ return false;
+ }
+
+ const auto& it = d_inclusionCache.find(std::make_pair(r1, r2));
+ if (it != d_inclusionCache.end())
+ {
+ return (*it).second;
+ }
+
+ std::vector<Node> v1, v2;
+ utils::getRegexpComponents(r1, v1);
+ utils::getRegexpComponents(r2, v2);
+
+ // In the following, we iterate over `r2` (the "includee") and try to
+ // match it with `r1`. `idxs`/`newIdxs` keep track of all the possible
+ // positions in `r1` that we could currently be at.
+ std::unordered_set<size_t> newIdxs = {0};
+ std::unordered_set<size_t> idxs;
+ for (const Node& n2 : v2)
+ {
+ // Transfer elements from `newIdxs` to `idxs`. Out-of-bound indices are
+ // removed and for (re.* re.allchar), we additionally include the
+ // option of skipping it.
+ idxs.clear();
+ for (size_t idx : newIdxs)
+ {
+ if (idx < v1.size())
+ {
+ idxs.insert(idx);
+ if (v1[idx] == d_sigma_star)
+ {
+ Assert(idx + 1 == v1.size() || v1[idx + 1] != d_sigma_star);
+ idxs.insert(idx + 1);
+ }
+ }
+ }
+ newIdxs.clear();
+
+ if (n2.getKind() == STRING_TO_REGEXP || n2 == d_sigma)
+ {
+ Assert(n2 == d_sigma
+ || (n2[0].isConst() && n2[0].getConst<String>().size() == 1));
+ for (size_t idx : idxs)
+ {
+ if (v1[idx] == d_sigma || v1[idx] == n2)
+ {
+ // Given a character or an re.allchar in `r2`, we can either
+ // match it with a corresponding character in `r1` or an
+ // re.allchar
+ newIdxs.insert(idx + 1);
+ }
+ }
+ }
+
+ for (size_t idx : idxs)
+ {
+ if (v1[idx] == d_sigma_star)
+ {
+ // (re.* re.allchar) can match an arbitrary amount of `r2`
+ newIdxs.insert(idx);
+ }
+ else if (utils::isUnboundedWildcard(v1, idx))
+ {
+ // If a series of re.allchar is followed by (re.* re.allchar), we
+ // can decide not to "waste" the re.allchar because the order of
+ // the two wildcards is not observable (i.e. it does not change
+ // the sequences matched by the regular expression)
+ newIdxs.insert(idx);
+ }
+ }
+
+ if (newIdxs.empty())
+ {
+ // If there are no candidates, we can't match the remainder of r2
+ d_inclusionCache[std::make_pair(r1, r2)] = false;
+ return false;
+ }
+ }
+
+ // We have processed all of `r2`. We are now looking if there was also a
+ // path to the end in `r1`. This makes sure that we don't have leftover
+ // bits in `r1` that don't match anything in `r2`.
+ bool result = false;
+ for (size_t idx : newIdxs)
+ {
+ if (idx == v1.size() || (idx == v1.size() - 1 && v1[idx] == d_sigma_star))
+ {
+ result = true;
+ break;
+ }
+ }
+
+ d_inclusionCache[std::make_pair(r1, r2)] = result;
+ return result;
+}
+
}/* CVC4::theory::strings namespace */
}/* CVC4::theory namespace */
}/* CVC4 namespace */
std::map<Node, Node> d_rm_inter_cache;
std::map<Node, bool> d_norv_cache;
std::map<Node, std::vector<PairNodes> > d_split_cache;
+ std::map<PairNodes, bool> d_inclusionCache;
void simplifyPRegExp(Node s, Node r, std::vector<Node> &new_nodes);
void simplifyNRegExp(Node s, Node r, std::vector<Node> &new_nodes);
/**
Node intersect(Node r1, Node r2, bool &spflag);
/** Get the pretty printed version of the regular expression r */
static std::string mkString(Node r);
+
+ /**
+ * Returns true if we can show that the regular expression `r1` includes
+ * the regular expression `r2` (i.e. `r1` matches a superset of sequences
+ * that `r2` matches). This method only works on a fragment of regular
+ * expressions, specifically regular expressions that pass the
+ * `isSimpleRegExp` check.
+ *
+ * @param r1 The regular expression that may include `r2` (must be in
+ * rewritten form)
+ * @param r2 The regular expression that may be included by `r1` (must be
+ * in rewritten form)
+ * @return True if the inclusion can be shown, false otherwise
+ */
+ bool regExpIncludes(Node r1, Node r2);
};
}/* CVC4::theory::strings namespace */
Trace("regexp-process") << "Checking Memberships ... " << std::endl;
for (const std::pair<const Node, std::vector<Node> >& mr : mems)
{
+ std::vector<Node> mems = mr.second;
Trace("regexp-process")
<< "Memberships(" << mr.first << ") = " << mr.second << std::endl;
- if (!checkEqcIntersect(mr.second))
+ if (!checkEqcInclusion(mems))
+ {
+ // conflict discovered, return
+ return;
+ }
+ if (!checkEqcIntersect(mems))
{
// conflict discovered, return
return;
}
}
+bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
+{
+ std::unordered_set<Node, NodeHashFunction> remove;
+
+ for (const Node& m1 : mems)
+ {
+ bool m1Neg = m1.getKind() == NOT;
+ Node m1Lit = m1Neg ? m1[0] : m1;
+
+ if (remove.find(m1) != remove.end())
+ {
+ // Skip memberships marked for removal
+ continue;
+ }
+
+ for (const Node& m2 : mems)
+ {
+ if (m1 == m2)
+ {
+ continue;
+ }
+
+ bool m2Neg = m2.getKind() == NOT;
+ Node m2Lit = m2Neg ? m2[0] : m2;
+
+ // Both regular expression memberships have the same polarity
+ if (m1Neg == m2Neg)
+ {
+ if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
+ {
+ if (m1Neg)
+ {
+ // ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
+ // mark ~str.in.re(x, R2) as reduced
+ d_parent.getExtTheory()->markReduced(m2Lit);
+ remove.insert(m2);
+ }
+ else
+ {
+ // str.in.re(x, R1) includes str.in.re(x, R2) --->
+ // mark str.in.re(x, R1) as reduced
+ d_parent.getExtTheory()->markReduced(m1Lit);
+ remove.insert(m1);
+
+ // We don't need to process m1 anymore
+ break;
+ }
+ }
+ }
+ else
+ {
+ Node pos = m1Neg ? m2Lit : m1Lit;
+ Node neg = m1Neg ? m1Lit : m2Lit;
+ if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
+ {
+ // We have a conflict because we have a case where str.in.re(x, R1)
+ // and ~str.in.re(x, R2) but R2 includes R1, so there is no
+ // possible value for x that satisfies both memberships.
+ std::vector<Node> vec_nodes;
+ vec_nodes.push_back(pos);
+ vec_nodes.push_back(neg.negate());
+
+ if (pos[0] != neg[0])
+ {
+ vec_nodes.push_back(pos[0].eqNode(neg[0]));
+ }
+
+ Node conc;
+ d_im.sendInference(vec_nodes, conc, "Intersect inclusion", true);
+ return false;
+ }
+ }
+ }
+ }
+
+ mems.erase(std::remove_if(
+ mems.begin(),
+ mems.end(),
+ [&remove](Node& n) { return remove.find(n) != remove.end(); }),
+ mems.end());
+
+ return true;
+}
+
bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
{
// do not compute intersections if the re intersection mode is none
void check(const std::map<Node, std::vector<Node>>& mems);
private:
+ /**
+ * Check memberships in equivalence class for regular expression
+ * inclusion.
+ *
+ * This method returns false if it discovered a conflict for this set of
+ * assertions, and true otherwise. It discovers a conflict e.g. if mems
+ * contains str.in.re(xi, Ri) and ~str.in.re(xj, Rj) and Rj includes Ri.
+ *
+ * @param mems Vector of memberships of the form: (~)str.in.re(x1, R1)
+ * ... (~)str.in.re(xn, Rn) where x1 = ... = xn in the
+ * current context. The function removes elements from this
+ * vector that were marked as reduced.
+ * @param expForRe Additional explanations for regular expressions.
+ * @return False if a conflict was detected, true otherwise
+ */
+ bool checkEqcInclusion(std::vector<Node>& mems);
+
/**
* Check memberships for equivalence class.
* The vector mems is a vector of memberships of the form:
return getConstantComponent(e);
}
+bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
+{
+ size_t i = start;
+ while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA)
+ {
+ i++;
+ }
+
+ if (i >= rs.size())
+ {
+ return false;
+ }
+
+ return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA;
+}
+
+bool isSimpleRegExp(Node r)
+{
+ Assert(r.getType().isRegExp());
+
+ std::vector<Node> v;
+ utils::getConcat(r, v);
+ for (const Node& n : v)
+ {
+ if (n.getKind() == STRING_TO_REGEXP)
+ {
+ if (!n[0].isConst())
+ {
+ return false;
+ }
+ }
+ else if (n.getKind() != REGEXP_SIGMA
+ && (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+void getRegexpComponents(Node r, std::vector<Node>& result)
+{
+ Assert(r.getType().isRegExp());
+
+ NodeManager* nm = NodeManager::currentNM();
+ if (r.getKind() == REGEXP_CONCAT)
+ {
+ for (const Node& n : r)
+ {
+ getRegexpComponents(n, result);
+ }
+ }
+ else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst())
+ {
+ String s = r[0].getConst<String>();
+ for (size_t i = 0, size = s.size(); i < size; i++)
+ {
+ result.push_back(
+ nm->mkNode(STRING_TO_REGEXP, nm->mkConst(s.substr(i, 1))));
+ }
+ }
+ else
+ {
+ result.push_back(r);
+ }
+}
+
} // namespace utils
} // namespace strings
} // namespace theory
*/
Node getConstantEndpoint(Node e, bool isSuf);
+/**
+ * Given a vector of regular expression nodes and a start index that points to
+ * a wildcard, returns true if the wildcard is unbounded (i.e. it is followed
+ * by an arbitrary number of `re.allchar`s and then an `re.*(re.allchar)`. If
+ * the start index is not a wilcard or the wildcards are not followed by
+ * `re.*(re.allchar)`, the function returns false.
+ *
+ * @param rs The vector of regular expression nodes
+ * @param start The start index to consider
+ * @return True if the wilcard pointed to by `start` is unbounded, false
+ * otherwise
+ */
+bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start);
+
+/**
+ * Returns true iff the given regular expression only consists of re.++,
+ * re.allchar, (re.* re.allchar), and str.to.re of string literals.
+ *
+ * @param r The regular expression to check
+ * @return True if the regular expression is simple
+ */
+bool isSimpleRegExp(Node r);
+
+/**
+ * Helper function that takes a regular expression concatenation and
+ * returns the components of the concatenation. Letters of string literals
+ * are treated as individual components.
+ *
+ * @param r The regular expression
+ * @param result The resulting components
+ */
+void getRegexpComponents(Node r, std::vector<Node>& result);
+
} // namespace utils
} // namespace strings
} // namespace theory
regress0/strings/norn-31.smt2
regress0/strings/norn-simp-rew.smt2
regress0/strings/re.all.smt2
+ regress0/strings/regexp_inclusion.smt2
+ regress0/strings/regexp_inclusion_reduction.smt2
regress0/strings/repl-rewrites2.smt2
regress0/strings/replace-const.smt2
regress0/strings/replaceall-eval.smt2
--- /dev/null
+; COMMAND-LINE: --strings-exp --no-re-elim
+(set-info :status unsat)
+(set-logic ALL)
+(declare-const actionName String)
+
+(assert
+(str.in.re actionName (re.++ (str.to.re "wiz") (re.* re.allchar) (str.to.re "foobar") (re.* re.allchar) (str.to.re "baz/") (re.* re.allchar))))
+
+(assert (not
+(str.in.re actionName (re.++ (str.to.re "wiz") (re.* re.allchar) (re.++ (str.to.re "foo") re.allchar (str.to.re "ar")) (re.* re.allchar) (str.to.re "baz/") (re.* re.allchar)))
+))
+
+(check-sat)
--- /dev/null
+; COMMAND-LINE: --strings-exp --no-re-elim
+(set-info :status sat)
+(set-logic QF_SLIA)
+(declare-const x String)
+(declare-const y String)
+
+(assert (str.in.re x (re.++ (str.to.re "bar") (re.* re.allchar) (str.to.re "bar"))))
+(assert (str.in.re x (re.++ (str.to.re "ba") re.allchar (re.* re.allchar) (str.to.re "bar"))))
+
+(assert (not (str.in.re y (re.++ (str.to.re "bar") (re.* re.allchar) (str.to.re "bar")))))
+(assert (not (str.in.re y (re.++ (str.to.re "ba") re.allchar (re.* re.allchar) (str.to.re "bar")))))
+(assert (not (= y "")))
+
+(check-sat)
+cvc4_add_unit_test_black(regexp_operation_black theory)
cvc4_add_unit_test_black(theory_black theory)
cvc4_add_unit_test_white(evaluator_white theory)
cvc4_add_unit_test_white(logic_info_white theory)
--- /dev/null
+/********************* */
+/*! \file regexp_operation_black.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Unit tests for symbolic regular expression operations
+ **
+ ** Unit tests for symbolic regular expression operations.
+ **/
+
+#include "expr/node.h"
+#include "expr/node_manager.h"
+#include "smt/smt_engine.h"
+#include "smt/smt_engine_scope.h"
+#include "theory/strings/regexp_operation.h"
+
+#include <cxxtest/TestSuite.h>
+#include <iostream>
+#include <memory>
+#include <vector>
+
+using namespace CVC4;
+using namespace CVC4::kind;
+using namespace CVC4::smt;
+using namespace CVC4::theory;
+using namespace CVC4::theory::strings;
+
+class RegexpOperationBlack : public CxxTest::TestSuite
+{
+ public:
+ RegexpOperationBlack() {}
+
+ void setUp() override
+ {
+ Options opts;
+ opts.setOutputLanguage(language::output::LANG_SMTLIB_V2);
+ d_em = new ExprManager(opts);
+ d_smt = new SmtEngine(d_em);
+ d_scope = new SmtScope(d_smt);
+ d_regExpOpr = new RegExpOpr();
+
+ d_nm = NodeManager::currentNM();
+ }
+
+ void tearDown() override
+ {
+ delete d_regExpOpr;
+ delete d_scope;
+ delete d_smt;
+ delete d_em;
+ }
+
+ void includes(Node r1, Node r2)
+ {
+ r1 = Rewriter::rewrite(r1);
+ r2 = Rewriter::rewrite(r2);
+ std::cout << r1 << " includes " << r2 << std::endl;
+ TS_ASSERT(d_regExpOpr->regExpIncludes(r1, r2));
+ }
+
+ void doesNotInclude(Node r1, Node r2)
+ {
+ r1 = Rewriter::rewrite(r1);
+ r2 = Rewriter::rewrite(r2);
+ std::cout << r1 << " does not include " << r2 << std::endl;
+ TS_ASSERT(!d_regExpOpr->regExpIncludes(r1, r2));
+ }
+
+ void testBasic()
+ {
+ Node sigma = d_nm->mkNode(REGEXP_SIGMA, std::vector<Node>{});
+ Node sigmaStar = d_nm->mkNode(REGEXP_STAR, sigma);
+ Node a = d_nm->mkNode(STRING_TO_REGEXP, d_nm->mkConst(String("a")));
+ Node c = d_nm->mkNode(STRING_TO_REGEXP, d_nm->mkConst(String("c")));
+ Node abc = d_nm->mkNode(STRING_TO_REGEXP, d_nm->mkConst(String("abc")));
+ Node sigma3 = d_nm->mkNode(REGEXP_CONCAT, sigma, sigma, sigma);
+ Node asc = d_nm->mkNode(REGEXP_CONCAT, a, sigma, c);
+ Node asw = d_nm->mkNode(REGEXP_CONCAT, a, sigma, sigmaStar);
+
+ includes(sigma3, abc);
+ doesNotInclude(abc, sigma3);
+
+ includes(sigma3, asc);
+ doesNotInclude(asc, sigma3);
+
+ includes(asc, abc);
+ doesNotInclude(abc, asc);
+
+ includes(asw, asc);
+ doesNotInclude(asc, asw);
+ }
+
+ void testStarWildcards()
+ {
+ Node sigma = d_nm->mkNode(REGEXP_SIGMA, std::vector<Node>{});
+ Node sigmaStar = d_nm->mkNode(REGEXP_STAR, sigma);
+ Node a = d_nm->mkNode(STRING_TO_REGEXP, d_nm->mkConst(String("a")));
+ Node c = d_nm->mkNode(STRING_TO_REGEXP, d_nm->mkConst(String("c")));
+ Node abc = d_nm->mkNode(STRING_TO_REGEXP, d_nm->mkConst(String("abc")));
+
+ Node _abc_ = d_nm->mkNode(REGEXP_CONCAT, sigmaStar, abc, sigmaStar);
+ Node _asc_ = d_nm->mkNode(REGEXP_CONCAT, sigmaStar, a, sigma, c, sigmaStar);
+ Node _sc_ = Rewriter::rewrite(
+ d_nm->mkNode(REGEXP_CONCAT, sigmaStar, sigma, c, sigmaStar));
+ Node _as_ = Rewriter::rewrite(
+ d_nm->mkNode(REGEXP_CONCAT, sigmaStar, a, sigma, sigmaStar));
+ Node _assc_ = d_nm->mkNode(
+ REGEXP_CONCAT,
+ std::vector<Node>{sigmaStar, a, sigma, sigma, c, sigmaStar});
+ Node _csa_ = d_nm->mkNode(REGEXP_CONCAT, sigmaStar, c, sigma, a, sigmaStar);
+ Node _c_a_ =
+ d_nm->mkNode(REGEXP_CONCAT, sigmaStar, c, sigmaStar, a, sigmaStar);
+ Node _s_s_ = Rewriter::rewrite(d_nm->mkNode(
+ REGEXP_CONCAT, sigmaStar, sigma, sigmaStar, sigma, sigmaStar));
+ Node _a_abc_ = Rewriter::rewrite(
+ d_nm->mkNode(REGEXP_CONCAT, sigmaStar, a, sigmaStar, abc, sigmaStar));
+
+ includes(_asc_, _abc_);
+ doesNotInclude(_abc_, _asc_);
+ doesNotInclude(_csa_, _abc_);
+ doesNotInclude(_assc_, _asc_);
+ doesNotInclude(_asc_, _assc_);
+ includes(_sc_, abc);
+ includes(_sc_, _abc_);
+ includes(_sc_, _asc_);
+ includes(_sc_, _assc_);
+ doesNotInclude(_sc_, _csa_);
+ includes(_as_, abc);
+ includes(_as_, _abc_);
+ includes(_as_, _asc_);
+ includes(_as_, _assc_);
+ doesNotInclude(_sc_, _csa_);
+ includes(_s_s_, _c_a_);
+ doesNotInclude(_c_a_, _s_s_);
+ includes(_abc_, _a_abc_);
+ doesNotInclude(_a_abc_, _abc_);
+ }
+
+ private:
+ ExprManager* d_em;
+ SmtEngine* d_smt;
+ SmtScope* d_scope;
+ RegExpOpr* d_regExpOpr;
+
+ NodeManager* d_nm;
+};