Fixes #3408 .
Adds basic rewriter, parsing, type checking and implements the required cases in regexp_operation.cpp. It also adds some missing documentation in regexp_operation.h
{REGEXP_LOOP, CVC4::Kind::REGEXP_LOOP},
{REGEXP_EMPTY, CVC4::Kind::REGEXP_EMPTY},
{REGEXP_SIGMA, CVC4::Kind::REGEXP_SIGMA},
+ {REGEXP_COMPLEMENT, CVC4::Kind::REGEXP_COMPLEMENT},
/* Quantifiers --------------------------------------------------------- */
{FORALL, CVC4::Kind::FORALL},
{EXISTS, CVC4::Kind::EXISTS},
{CVC4::Kind::REGEXP_LOOP, REGEXP_LOOP},
{CVC4::Kind::REGEXP_EMPTY, REGEXP_EMPTY},
{CVC4::Kind::REGEXP_SIGMA, REGEXP_SIGMA},
+ {CVC4::Kind::REGEXP_COMPLEMENT, REGEXP_COMPLEMENT},
/* Quantifiers ----------------------------------------------------- */
{CVC4::Kind::FORALL, FORALL},
{CVC4::Kind::EXISTS, EXISTS},
* mkTerm(Kind kind)
*/
REGEXP_SIGMA,
+ /**
+ * Regexp complement.
+ * Parameters: 1
+ * -[1]: Term of sort RegExp
+ * Create with:
+ * mkTerm(Kind kind, Term child1)
+ */
+ REGEXP_COMPLEMENT,
#if 0
/* regexp rv (internal use only) */
REGEXP_RV,
REGEXP_LOOP_TOK = 'RE_LOOP';
REGEXP_EMPTY_TOK = 'RE_EMPTY';
REGEXP_SIGMA_TOK = 'RE_SIGMA';
+ REGEXP_COMPLEMENT_TOK = 'RE_COMPLEMENT';
SETS_CARD_TOK = 'CARD';
{ f = MK_EXPR(CVC4::kind::REGEXP_RANGE, f, f2); }
| REGEXP_LOOP_TOK LPAREN formula[f] COMMA formula[f2] COMMA formula[f3] RPAREN
{ f = MK_EXPR(CVC4::kind::REGEXP_LOOP, f, f2, f3); }
+ | REGEXP_COMPLEMENT_TOK LPAREN formula[f] RPAREN
+ { f = MK_EXPR(CVC4::kind::REGEXP_COMPLEMENT, f); }
| REGEXP_EMPTY_TOK
{ f = MK_EXPR(CVC4::kind::REGEXP_EMPTY, std::vector<Expr>()); }
| REGEXP_SIGMA_TOK
addOperator(kind::REGEXP_OPT, "re.opt");
addOperator(kind::REGEXP_RANGE, "re.range");
addOperator(kind::REGEXP_LOOP, "re.loop");
+ addOperator(kind::REGEXP_COMPLEMENT, "re.comp");
addOperator(kind::STRING_LT, "str.<");
addOperator(kind::STRING_LEQ, "str.<=");
}
case kind::REGEXP_OPT:
case kind::REGEXP_RANGE:
case kind::REGEXP_LOOP:
+ case kind::REGEXP_COMPLEMENT:
case kind::REGEXP_EMPTY:
case kind::REGEXP_SIGMA: out << smtKindString(k, d_variant) << " "; break;
case kind::REGEXP_OPT: return "re.opt";
case kind::REGEXP_RANGE: return "re.range";
case kind::REGEXP_LOOP: return "re.loop";
+ case kind::REGEXP_COMPLEMENT: return "re.comp";
//sep theory
case kind::SEP_STAR: return "sep";
operator REGEXP_OPT 1 "regexp ?"
operator REGEXP_RANGE 2 "regexp range"
operator REGEXP_LOOP 2:3 "regexp loop"
+operator REGEXP_COMPLEMENT 1 "regexp complement"
operator REGEXP_EMPTY 0 "regexp empty"
operator REGEXP_SIGMA 0 "regexp all characters"
typerule REGEXP_OPT "SimpleTypeRule<RRegExp, ARegExp>"
typerule REGEXP_RANGE ::CVC4::theory::strings::RegExpRangeTypeRule
typerule REGEXP_LOOP "SimpleTypeRule<RRegExp, ARegExp, AInteger, AOptional<AInteger>>"
+typerule REGEXP_COMPLEMENT "SimpleTypeRule<RRegExp, ARegExp>"
typerule STRING_TO_REGEXP "SimpleTypeRule<RRegExp, AString>"
visit.pop_back();
it = d_constCache.find(cur);
+ Kind ck = cur.getKind();
if (it == d_constCache.end())
{
- Kind ck = cur.getKind();
if (ck == STRING_TO_REGEXP)
{
Node tmp = Rewriter::rewrite(cur[0]);
}
else if (it->second == RE_C_UNKNOWN)
{
- RegExpConstType ret = RE_C_CONRETE_CONSTANT;
+ RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT;
for (const Node& cn : cur)
{
it = d_constCache.find(cn);
return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
|| k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
|| k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
- || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV;
+ || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
+ || k == REGEXP_COMPLEMENT;
}
// 0-unknown, 1-yes, 2-no
}
break;
}
+ case kind::REGEXP_COMPLEMENT:
+ {
+ int tmp = delta(r[0], exp);
+ // flip the result if known
+ tmp = tmp == 0 ? 0 : (3 - tmp);
+ exp = exp.isNull() ? exp : exp.negate();
+ break;
+ }
default: {
Assert(!isRegExpKind(k));
break;
}
break;
}
+ case kind::REGEXP_COMPLEMENT:
+ {
+ // don't know result
+ return 0;
+ break;
+ }
default: {
Assert(!isRegExpKind(r.getKind()));
return 0;
//Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
break;
}
+ case kind::REGEXP_COMPLEMENT:
default: {
Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
Unreachable();
break;
}
case kind::REGEXP_SIGMA:
+ case kind::REGEXP_COMPLEMENT:
default: {
// we do not expect to call this function on regular expressions that
// aren't a standard regular expression kind. However, if we do, then
// the following code is conservative and says that the current
// regular expression can begin with any character.
- Assert(k == REGEXP_SIGMA);
+ Assert(isRegExpKind(k));
// can start with any character
Assert(d_lastchar < std::numeric_limits<unsigned>::max());
for (unsigned i = 0; i <= d_lastchar; i++)
}
break;
}
+ case kind::REGEXP_COMPLEMENT:
+ {
+ // ~( s in complement(R) ) ---> s in R
+ conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]);
+ break;
+ }
default: {
Assert(!isRegExpKind(k));
break;
}
break;
}
+ case kind::REGEXP_COMPLEMENT:
+ {
+ // s in complement(R) ---> ~( s in R )
+ conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]).negate();
+ break;
+ }
default: {
Assert(!isRegExpKind(k));
break;
if(n == d_emptyRegexp) {
r1 = d_emptyRegexp;
r2 = d_emptyRegexp;
+ return;
} else if(n == d_emptySingleton) {
r1 = d_emptySingleton;
r2 = d_emptySingleton;
- } else if(n.getKind() == kind::REGEXP_RV) {
+ }
+ Kind nk = n.getKind();
+ if (nk == REGEXP_RV)
+ {
Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
<< "Exceeded UINT32_MAX in RegExpOpr::convert2";
unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
} else {
r2 = n;
}
- } else if(n.getKind() == kind::REGEXP_CONCAT) {
+ }
+ else if (nk == REGEXP_CONCAT)
+ {
bool flag = true;
std::vector<Node> vr1, vr2;
for( unsigned i=0; i<n.getNumChildren(); i++ ) {
r1 = d_emptySingleton;
r2 = n;
}
- } else if(n.getKind() == kind::REGEXP_UNION) {
+ }
+ else if (nk == REGEXP_UNION)
+ {
std::vector<Node> vr1, vr2;
for( unsigned i=0; i<n.getNumChildren(); i++ ) {
Node t1, t2;
}
r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
- } else if(n.getKind() == kind::STRING_TO_REGEXP || n.getKind() == kind::REGEXP_SIGMA || n.getKind() == kind::REGEXP_RANGE) {
- r1 = d_emptySingleton;
- r2 = n;
- } else if(n.getKind() == kind::REGEXP_LOOP) {
- //TODO:LOOP
+ }
+ else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE
+ || nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP)
+ {
+ // this leaves n unchanged
r1 = d_emptySingleton;
r2 = n;
- //Unreachable();
- } else {
+ }
+ else
+ {
//is it possible?
Unreachable();
}
case REGEXP_CONCAT:
case REGEXP_UNION:
case REGEXP_STAR:
+ case REGEXP_COMPLEMENT:
{
NodeBuilder<> nb(rk);
for (const Node& rc : r)
retStr += ">";
break;
}
+ case REGEXP_COMPLEMENT:
+ {
+ retStr += "^(";
+ retStr += mkString(r[0]);
+ retStr += ")";
+ break;
+ }
default:
{
std::stringstream ss;
*/
enum RegExpConstType
{
- // the regular expression doesn't contain variables or re.allchar or re.range
+ // the regular expression doesn't contain variables or re.comp,
+ // re.allchar or re.range (call these three operators "non-concrete
+ // operators"). Notice that re.comp is a non-concrete operator
+ // since it can be seen as indirectly defined in terms of re.allchar.
RE_C_CONRETE_CONSTANT,
// the regular expression doesn't contain variables, but may contain
- // re.allchar or re.range
+ // re.comp, re.allchar or re.range
RE_C_CONSTANT,
// the regular expression may contain variables
RE_C_VARIABLE,
/** is k a native operator whose return type is a regular expression? */
static bool isRegExpKind(Kind k);
void simplify(Node t, std::vector< Node > &new_nodes, bool polarity);
+ /**
+ * This method returns 1 if the empty string is in r, 2 if the empty string
+ * is not in r, or 0 if it is unknown whether the empty string is in r.
+ * TODO (project #2): refactor the return value of this function.
+ *
+ * If this method returns 0, then exp is updated to an explanation that
+ * would imply that the empty string is in r.
+ *
+ * For example,
+ * - delta( (re.inter (str.to.re x) (re.* "A")) ) returns 0 and sets exp to
+ * x = "",
+ * - delta( (re.++ (str.to.re "A") R) ) returns 2,
+ * - delta( (re.union (re.* "A") R) ) returns 1.
+ */
int delta( Node r, Node &exp );
int derivativeS( Node r, CVC4::String c, Node &retNode );
Node derivativeSingle( Node r, CVC4::String c );
}
}
}
+ case REGEXP_COMPLEMENT:
+ {
+ return !testConstStringInRegExp(s, index_start, r[0]);
+ break;
+ }
default: {
Assert(!RegExpOpr::isRegExpKind(k));
return false;
retNode = nm->mkNode(AND,
nm->mkNode(LEQ, nm->mkNode(STRING_CODE, r[0]), xcode),
nm->mkNode(LEQ, xcode, nm->mkNode(STRING_CODE, r[1])));
- }else if(x != node[0] || r != node[1]) {
+ }
+ else if (r.getKind() == REGEXP_COMPLEMENT)
+ {
+ retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate();
+ }
+ else if (x != node[0] || r != node[1])
+ {
retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, r );
}
regress0/strings/code-eval.smt2
regress0/strings/code-perf.smt2
regress0/strings/code-sat-neg-one.smt2
+ regress0/strings/complement-simple.smt2
regress0/strings/escchar.smt2
regress0/strings/escchar_25.smt2
regress0/strings/hconst-092618.smt2
regress1/strings/cmu-inc-nlpp-071516.smt2
regress1/strings/cmu-substr-rw.smt2
regress1/strings/code-sequence.smt2
+ regress1/strings/complement-test.smt2
regress1/strings/crash-1019.smt2
regress1/strings/csp-prefix-exp-bug.smt2
regress1/strings/double-replace.smt2
--- /dev/null
+(set-logic SLIA)
+(set-info :status sat)
+(declare-fun x () String)
+(assert (str.in.re x (re.comp (str.to.re "ABC"))))
+(check-sat)
--- /dev/null
+(set-logic SLIA)
+(set-info :status sat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(assert (= x (str.++ "AB" y)))
+(assert (or (= y "C") (= y (str.++ "D" z)) (= (str.len y) 10)))
+(assert (str.in.re x
+ (re.inter
+ (re.comp (str.to.re "AB"))
+ (re.comp (re.++ (str.to.re "AB") (re.range "A" "E") (re.* re.allchar))))))
+(check-sat)