From: Andrew Reynolds Date: Wed, 23 Oct 2019 23:15:28 +0000 (-0500) Subject: Fixes for SyGuS + regular expressions (#3313) X-Git-Tag: cvc5-1.0.0~3874 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=64cef995a521ac7211b9e3ed95c85deb186ff352;p=cvc5.git Fixes for SyGuS + regular expressions (#3313) This commit fixes numerous issues involving the combination of SyGuS and regular expressions. Combining SyGuS and regular expressions may involve constructing regular expressions that are neither variables nor builtin regular expression operators. The code was not robust for this case, either throwing spurious assertion failures or having incorrect behavior. --- diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index d2505f4f4..b20e2f3ad 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -48,6 +48,7 @@ RegExpOpr::RegExpOpr() RegExpOpr::~RegExpOpr() {} bool RegExpOpr::checkConstRegExp( Node r ) { + Assert(r.getType().isRegExp()); Trace("strings-regexp-cstre") << "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl; RegExpConstType rct = getRegExpConstType(r); @@ -56,6 +57,7 @@ bool RegExpOpr::checkConstRegExp( Node r ) { RegExpConstType RegExpOpr::getRegExpConstType(Node r) { + Assert(r.getType().isRegExp()); std::unordered_map::iterator it; std::vector visit; TNode cur; @@ -79,11 +81,25 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r) { d_constCache[cur] = RE_C_CONSTANT; } + else if (!isRegExpKind(ck)) + { + // non-regular expression applications, e.g. function applications + // with regular expression return type are treated as variables. + d_constCache[cur] = RE_C_VARIABLE; + } else { d_constCache[cur] = RE_C_UNKNOWN; visit.push_back(cur); - visit.insert(visit.end(), cur.begin(), cur.end()); + if (ck == REGEXP_LOOP) + { + // only add the first child of loop + visit.push_back(cur[0]); + } + else + { + visit.insert(visit.end(), cur.begin(), cur.end()); + } } } else if (it->second == RE_C_UNKNOWN) @@ -105,6 +121,14 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r) return d_constCache[r]; } +bool RegExpOpr::isRegExpKind(Kind k) +{ + return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP + || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER + || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT + || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV; +} + // 0-unknown, 1-yes, 2-no int RegExpOpr::delta( Node r, Node &exp ) { Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl; @@ -113,7 +137,7 @@ int RegExpOpr::delta( Node r, Node &exp ) { ret = d_delta_cache[r].first; exp = d_delta_cache[r].second; } else { - int k = r.getKind(); + Kind k = r.getKind(); switch( k ) { case kind::REGEXP_EMPTY: { ret = 2; @@ -241,8 +265,8 @@ int RegExpOpr::delta( Node r, Node &exp ) { break; } default: { - //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl; - Unreachable(); + Assert(!isRegExpKind(k)); + break; } } if(!exp.isNull()) { @@ -481,8 +505,9 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { break; } default: { - //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; - Unreachable(); + Assert(!isRegExpKind(r.getKind())); + return 0; + break; } } if(retNode != d_emptyRegexp) { @@ -515,7 +540,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { retNode = d_emptyRegexp; } } else { - int k = r.getKind(); + Kind k = r.getKind(); switch( k ) { case kind::REGEXP_EMPTY: { retNode = d_emptyRegexp; @@ -657,6 +682,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { default: { Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; Unreachable(); + break; } } if(retNode != d_emptyRegexp) { @@ -680,19 +706,11 @@ void RegExpOpr::firstChars(Node r, std::set &pcset, SetNodes &pvset) // cset is code points std::set cset; SetNodes vset; - int k = r.getKind(); + Kind k = r.getKind(); switch( k ) { case kind::REGEXP_EMPTY: { break; } - case kind::REGEXP_SIGMA: { - Assert(d_lastchar < std::numeric_limits::max()); - for (unsigned i = 0; i <= d_lastchar; i++) - { - cset.insert(i); - } - break; - } case kind::REGEXP_RANGE: { unsigned a = r[0].getConst().front(); a = String::convertUnsignedIntToCode(a); @@ -767,9 +785,20 @@ void RegExpOpr::firstChars(Node r, std::set &pcset, SetNodes &pvset) firstChars(r[0], cset, vset); break; } + case kind::REGEXP_SIGMA: default: { - Trace("regexp-error") << "Unsupported term: " << r << " in firstChars." << std::endl; - Unreachable(); + // we do not expect to call this function on regular expressions that + // aren't a standard regular expression kind. However, if we do, then + // the following code is conservative and says that the current + // regular expression can begin with any character. + Assert(k == REGEXP_SIGMA); + // can start with any character + Assert(d_lastchar < std::numeric_limits::max()); + for (unsigned i = 0; i <= d_lastchar; i++) + { + cset.insert(i); + } + break; } } pcset.insert(cset.begin(), cset.end()); @@ -817,7 +846,7 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes if(itr != d_simpl_neg_cache.end()) { new_nodes.push_back( itr->second ); } else { - int k = r.getKind(); + Kind k = r.getKind(); Node conc; switch( k ) { case kind::REGEXP_EMPTY: { @@ -1018,13 +1047,16 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes break; } default: { - Trace("strings-error") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl; - Assert( false, "Unsupported Term" ); + Assert(!isRegExpKind(k)); + break; } } - conc = Rewriter::rewrite( conc ); - new_nodes.push_back( conc ); - d_simpl_neg_cache[p] = conc; + if (!conc.isNull()) + { + conc = Rewriter::rewrite(conc); + new_nodes.push_back(conc); + d_simpl_neg_cache[p] = conc; + } } } void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) { @@ -1034,7 +1066,7 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes if(itr != d_simpl_cache.end()) { new_nodes.push_back( itr->second ); } else { - int k = r.getKind(); + Kind k = r.getKind(); Node conc; switch( k ) { case kind::REGEXP_EMPTY: { @@ -1191,13 +1223,16 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes break; } default: { - Trace("strings-error") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl; - Assert( false, "Unsupported Term" ); + Assert(!isRegExpKind(k)); + break; } } - conc = Rewriter::rewrite( conc ); - new_nodes.push_back( conc ); - d_simpl_cache[p] = conc; + if (!conc.isNull()) + { + conc = Rewriter::rewrite(conc); + new_nodes.push_back(conc); + d_simpl_cache[p] = conc; + } } } @@ -1644,9 +1679,13 @@ std::string RegExpOpr::mkString( Node r ) { break; } default: - Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl; - //Assert( false ); - //return Node::null(); + { + std::stringstream ss; + ss << r; + retStr = ss.str(); + Assert(!isRegExpKind(r.getKind())); + break; + } } } diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index 117449d35..c7464760d 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -119,6 +119,8 @@ class RegExpOpr { bool checkConstRegExp( Node r ); /** get the constant type for regular expression r */ RegExpConstType getRegExpConstType(Node r); + /** is k a native operator whose return type is a regular expression? */ + static bool isRegExpKind(Kind k); void simplify(Node t, std::vector< Node > &new_nodes, bool polarity); int delta( Node r, Node &exp ); int derivativeS( Node r, CVC4::String c, Node &retNode ); diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp index 0e44c9461..11471c09f 100644 --- a/src/theory/strings/regexp_solver.cpp +++ b/src/theory/strings/regexp_solver.cpp @@ -227,29 +227,37 @@ void RegExpSolver::check(const std::map >& mems) << std::endl; // if so, do simple unrolling std::vector nvec; - if (nvec.empty()) + Trace("strings-regexp") << "Simplify on " << atom << std::endl; + d_regexp_opr.simplify(atom, nvec, polarity); + Trace("strings-regexp") << "...finished" << std::endl; + // if simplifying successfully generated a lemma + if (!nvec.empty()) { - d_regexp_opr.simplify(atom, nvec, polarity); - } - std::vector exp_n; - exp_n.push_back(assertion); - Node conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec); - d_im.sendInference(rnfexp, exp_n, conc, "REGEXP_Unfold"); - addedLemma = true; - if (changed) - { - cprocessed.push_back(assertion); + std::vector exp_n; + exp_n.push_back(assertion); + Node conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec); + d_im.sendInference(rnfexp, exp_n, conc, "REGEXP_Unfold"); + addedLemma = true; + if (changed) + { + cprocessed.push_back(assertion); + } + else + { + processed.push_back(assertion); + } + if (e == 0) + { + // Remember that we have unfolded a membership for x + // notice that we only do this here, after we have definitely + // added a lemma. + repUnfold.insert(rep); + } } else { - processed.push_back(assertion); - } - if (e == 0) - { - // Remember that we have unfolded a membership for x - // notice that we only do this here, after we have definitely - // added a lemma. - repUnfold.insert(rep); + // otherwise we are incomplete + d_parent.getOutputChannel().setIncomplete(); } } if (d_state.isInConflict()) @@ -387,7 +395,7 @@ bool RegExpSolver::checkEqcIntersect(const std::vector& mems) Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP); continue; } - RegExpConstType rct = d_regexp_opr.getRegExpConstType(m); + RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]); if (rct == RE_C_VARIABLE || (options::stringRegExpInterMode() == RE_INTER_CONSTANT && rct != RE_C_CONRETE_CONSTANT)) @@ -629,7 +637,7 @@ Node RegExpSolver::getNormalSymRegExp(Node r, std::vector& nf_exp) { Trace("strings-error") << "Unsupported term: " << r << " in normalization SymRegExp." << std::endl; - Assert(false); + Assert(!RegExpOpr::isRegExpKind(r.getKind())); } } return ret; diff --git a/src/theory/strings/regexp_solver.h b/src/theory/strings/regexp_solver.h index 0b84ebc79..a43ea516a 100644 --- a/src/theory/strings/regexp_solver.h +++ b/src/theory/strings/regexp_solver.h @@ -108,6 +108,16 @@ class RegExpSolver InferenceManager& d_im; // check membership constraints Node mkAnd(Node c1, Node c2); + /** + * Check partial derivative + * + * Returns false if a lemma pertaining to checking the partial derivative + * of x in r was added. In this case, addedLemma is updated to true. + * + * The argument atom is the assertion that explains x in r, which is the + * normalized form of atom that may be modified using a substitution whose + * explanation is nf_exp. + */ bool checkPDerivative( Node x, Node r, Node atom, bool& addedLemma, std::vector& nf_exp); Node getMembership(Node n, bool isPos, unsigned i); diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index d3ec6d35c..adf74f0cc 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -23,6 +23,7 @@ #include "options/strings_options.h" #include "smt/logic_exception.h" #include "theory/arith/arith_msum.h" +#include "theory/strings/regexp_operation.h" #include "theory/strings/theory_strings_utils.h" #include "theory/theory.h" #include "util/integer.h" @@ -1186,7 +1187,7 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i Assert( index_start <= s.size() ); Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at " << index_start << std::endl; Assert(!r.isVar()); - int k = r.getKind(); + Kind k = r.getKind(); switch( k ) { case kind::STRING_TO_REGEXP: { CVC4::String s2 = s.substr( index_start, s.size() - index_start ); @@ -1338,8 +1339,7 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i } } default: { - Trace("strings-error") << "Unsupported term: " << r << " in testConstStringInRegExp." << std::endl; - Unreachable(); + Assert(!RegExpOpr::isRegExpKind(k)); return false; } } diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 876751b02..8fc4b6410 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1728,6 +1728,7 @@ set(regress_1_tests regress1/sygus/qe.sy regress1/sygus/qf_abv.smt2 regress1/sygus/real-grammar.sy + regress1/sygus/re-concat.sy regress1/sygus/repair-const-rl.sy regress1/sygus/simple-regexp.sy regress1/sygus/stopwatch-bt.sy diff --git a/test/regress/regress1/sygus/re-concat.sy b/test/regress/regress1/sygus/re-concat.sy new file mode 100644 index 000000000..3449ed505 --- /dev/null +++ b/test/regress/regress1/sygus/re-concat.sy @@ -0,0 +1,13 @@ +; EXPECT: unsat +; COMMAND-LINE: --sygus-out=status +(set-logic SLIA) +(synth-fun f () RegLan ( + (Start RegLan ( + (str.to.re Tokens) + (re.++ Start Start))) + (Tokens String ("A" "B")) + )) + +(constraint (str.in.re "AB" f)) + +(check-synth)