From cdefee8664d3e99a4cd97c4affbc639937187d50 Mon Sep 17 00:00:00 2001 From: Tianyi Liang Date: Fri, 28 Feb 2014 11:05:09 -0600 Subject: [PATCH] a new regular expression engine for solving both positive and negative membership constraints --- src/parser/smt2/Smt2.g | 6 +- src/printer/smt2/smt2_printer.cpp | 4 +- src/smt/smt_engine.cpp | 12 +- src/theory/strings/kinds | 4 +- src/theory/strings/options | 16 +- src/theory/strings/regexp_operation.cpp | 530 +++++++++++------- src/theory/strings/regexp_operation.h | 12 +- src/theory/strings/theory_strings.cpp | 136 ++++- src/theory/strings/theory_strings.h | 4 + .../strings/theory_strings_preprocess.cpp | 140 +++-- .../strings/theory_strings_preprocess.h | 2 +- .../strings/theory_strings_rewriter.cpp | 132 ++--- .../strings/theory_strings_type_rules.h | 2 +- test/regress/regress0/strings/Makefile.am | 3 +- test/regress/regress0/strings/fmf001.smt2 | 1 + test/regress/regress0/strings/fmf002.smt2 | 1 + test/regress/regress0/strings/loop007.smt2 | 1 + test/regress/regress0/strings/loop008.smt2 | 1 + test/regress/regress0/strings/loop009.smt2 | 1 + test/regress/regress0/strings/regexp001.smt2 | 1 + test/regress/regress0/strings/regexp002.smt2 | 1 + 21 files changed, 624 insertions(+), 386 deletions(-) diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g index 9b598e113..cec6255a4 100644 --- a/src/parser/smt2/Smt2.g +++ b/src/parser/smt2/Smt2.g @@ -1299,7 +1299,7 @@ builtinOp[CVC4::Kind& kind] | STRINRE_TOK { $kind = CVC4::kind::STRING_IN_REGEXP; } | STRTORE_TOK { $kind = CVC4::kind::STRING_TO_REGEXP; } | RECON_TOK { $kind = CVC4::kind::REGEXP_CONCAT; } - | REOR_TOK { $kind = CVC4::kind::REGEXP_OR; } + | REUNION_TOK { $kind = CVC4::kind::REGEXP_UNION; } | REINTER_TOK { $kind = CVC4::kind::REGEXP_INTER; } | RESTAR_TOK { $kind = CVC4::kind::REGEXP_STAR; } | REPLUS_TOK { $kind = CVC4::kind::REGEXP_PLUS; } @@ -1699,8 +1699,8 @@ STRSTOI_TOK : 'str.to.int' ; STRINRE_TOK : 'str.in.re'; STRTORE_TOK : 'str.to.re'; RECON_TOK : 're.++'; -REOR_TOK : 're.or'; -REINTER_TOK : 're.itr'; +REUNION_TOK : 're.union'; +REINTER_TOK : 're.inter'; RESTAR_TOK : 're.*'; REPLUS_TOK : 're.+'; REOPT_TOK : 're.opt'; diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index a2a925d13..c182906de 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -340,8 +340,8 @@ void Smt2Printer::toStream(std::ostream& out, TNode n, case kind::STRING_STOI: out << "str.to.int "; break; case kind::STRING_TO_REGEXP: out << "str.to.re "; break; case kind::REGEXP_CONCAT: out << "re.++ "; break; - case kind::REGEXP_OR: out << "re.or "; break; - case kind::REGEXP_INTER: out << "re.itr "; break; + case kind::REGEXP_UNION: out << "re.union "; break; + case kind::REGEXP_INTER: out << "re.inter "; break; case kind::REGEXP_STAR: out << "re.* "; break; case kind::REGEXP_PLUS: out << "re.+ "; break; case kind::REGEXP_OPT: out << "re.opt "; break; diff --git a/src/smt/smt_engine.cpp b/src/smt/smt_engine.cpp index 9abd6e165..4719af3c0 100644 --- a/src/smt/smt_engine.cpp +++ b/src/smt/smt_engine.cpp @@ -793,6 +793,14 @@ void SmtEngine::finalOptionsAreSet() { return; } + // set strings-exp + /* + if(!d_logic.hasEverything() && d_logic.isTheoryEnabled(THEORY_STRINGS) ) { + if(! options::stringExp.wasSetByUser()) { + options::stringExp.set( true ); + Trace("smt") << "turning on strings-exp, for the theory of strings" << std::endl; + } + }*/ // for strings if(options::stringExp()) { if( !d_logic.isQuantified() ) { @@ -808,11 +816,11 @@ void SmtEngine::finalOptionsAreSet() { options::fmfBoundInt.set( true ); Trace("smt") << "turning on fmf-bound-int, for strings-exp" << std::endl; } + /* if(! options::rewriteDivk.wasSetByUser()) { options::rewriteDivk.set( true ); Trace("smt") << "turning on rewrite-divk, for strings-exp" << std::endl; - } - + }*/ /* if(! options::stringFMF.wasSetByUser()) { options::stringFMF.set( true ); diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index f8f8b9555..2c9ba4b3f 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -71,7 +71,7 @@ typerule CONST_REGEXP ::CVC4::theory::strings::RegExpConstantTypeRule # equal equal / less than / output operator STRING_TO_REGEXP 1 "convert string to regexp" operator REGEXP_CONCAT 2: "regexp concat" -operator REGEXP_OR 2: "regexp or" +operator REGEXP_UNION 2: "regexp union" operator REGEXP_INTER 2: "regexp intersection" operator REGEXP_STAR 1 "regexp *" operator REGEXP_PLUS 1 "regexp +" @@ -97,7 +97,7 @@ constant REGEXP_SIGMA \ # "a regexp contains all strings" typerule REGEXP_CONCAT ::CVC4::theory::strings::RegExpConcatTypeRule -typerule REGEXP_OR ::CVC4::theory::strings::RegExpOrTypeRule +typerule REGEXP_UNION ::CVC4::theory::strings::RegExpUnionTypeRule typerule REGEXP_INTER ::CVC4::theory::strings::RegExpInterTypeRule typerule REGEXP_STAR ::CVC4::theory::strings::RegExpStarTypeRule typerule REGEXP_PLUS ::CVC4::theory::strings::RegExpPlusTypeRule diff --git a/src/theory/strings/options b/src/theory/strings/options index 139eca6ac..9bc9e2582 100644 --- a/src/theory/strings/options +++ b/src/theory/strings/options @@ -5,19 +5,19 @@ module STRINGS "theory/strings/options.h" Strings theory -expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 256 :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h" - the cardinality of the characters used by the theory of strings, default 256 +option stringExp strings-exp --strings-exp bool :default false :read-write + experimental features in the theory of strings -option stringRegExpUnrollDepth strings-regexp-depth --strings-regexp-depth=N int16_t :default 10 :read-write - the depth of unrolloing regular expression used by the theory of strings, default 10 +option stringLB strings-lb --strings-lb=N unsigned :default 0 :predicate less_equal(2) :predicate-include "smt/smt_engine.h" + the strategy of LB rule application: 0-lazy, 1-eager, 2-no option stringFMF strings-fmf --strings-fmf bool :default false :read-write the finite model finding used by the theory of strings -option stringLB strings-lb --strings-lb=N unsigned :default 0 :predicate less_equal(2) :predicate-include "smt/smt_engine.h" - the strategy of LB rule application: 0-lazy, 1-eager, 2-no +expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 256 :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h" + the cardinality of the characters used by the theory of strings, default 256 -option stringExp strings-exp --strings-exp bool :default false :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h" - experimental features in the theory of strings +option stringRegExpUnrollDepth strings-regexp-depth --strings-regexp-depth=N int16_t :default 10 :read-write + the depth of unrolloing regular expression used by the theory of strings, default 10 (deprecated) endmodule diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index e608a0533..6869e45f3 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -34,10 +34,16 @@ namespace strings { RegExpOpr::RegExpOpr() { d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); + d_true = NodeManager::currentNM()->mkConst( true ); + d_false = NodeManager::currentNM()->mkConst( false ); + d_emptyRegexp = NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY ); + d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) ); + d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); // All Charactors = all printable ones 32-126 d_char_start = 'a';//' '; d_char_end = 'c';//'~'; d_sigma = mkAllExceptOne( '\0' ); + //d_sigma = NodeManager::currentNM()->mkConst( kind::REGEXP_SIGMA ); d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma ); } @@ -79,8 +85,15 @@ int RegExpOpr::delta( Node r ) { } else { int k = r.getKind(); switch( k ) { - case kind::STRING_TO_REGEXP: - { + case kind::REGEXP_EMPTY: { + ret = 2; + break; + } + case kind::REGEXP_SIGMA: { + ret = 2; + break; + } + case kind::STRING_TO_REGEXP: { if(r[0].isConst()) { if(r[0] == d_emptyString) { ret = 1; @@ -90,10 +103,9 @@ int RegExpOpr::delta( Node r ) { } else { ret = 0; } - } break; - case kind::REGEXP_CONCAT: - { + } + case kind::REGEXP_CONCAT: { bool flag = false; for(unsigned i=0; imkNode( kind::REGEXP_OR, vec_nodes ) ); - } + retNode = vec_nodes.size() == 0 ? d_emptyRegexp : + ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); break; - case kind::REGEXP_INTER: - { + } + case kind::REGEXP_INTER: { bool flag = true; bool flag_sg = false; std::vector< Node > vec_nodes; for(unsigned i=0; imkNode( kind::REGEXP_INTER, vec_nodes ) ); } } else { - retNode = Node::null(); + retNode = d_emptyRegexp; } - } break; - case kind::REGEXP_STAR: - { - Node dc = derivativeSingle(r[0], c); - if(!dc.isNull()) { - retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ); - } else { - retNode = Node::null(); - } } - break; - case kind::REGEXP_PLUS: - { + case kind::REGEXP_STAR: { Node dc = derivativeSingle(r[0], c); - if(!dc.isNull()) { - retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ); + if(dc != d_emptyRegexp) { + retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ); } else { - retNode = Node::null(); + retNode = d_emptyRegexp; } - } - break; - case kind::REGEXP_OPT: - { - retNode = derivativeSingle(r[0], c); - } break; - case kind::REGEXP_RANGE: - { - char ch = c.getFirstChar(); - if (ch >= r[0].getConst< CVC4::String >().getFirstChar() && ch <= r[1].getConst< CVC4::String >().getFirstChar() ) { - retNode = d_emptyString; - } else { - retNode = Node::null(); - } } - break; - default: + default: { //TODO: special sym: sigma, none, all Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; - //AlwaysAssert( false ); + Assert( false, "Unsupported Term" ); //return Node::null(); + } } - if(!retNode.isNull()) { + if(retNode != d_emptyRegexp) { retNode = Rewriter::rewrite( retNode ); } d_dv_cache[dv] = retNode; @@ -368,7 +353,7 @@ bool RegExpOpr::guessLength( Node r, int &co ) { return true; } break; - case kind::REGEXP_OR: + case kind::REGEXP_UNION: { int g_co; for(unsigned i=0; i &vec_chars ) return true; } break; - case kind::REGEXP_OR: + case kind::REGEXP_UNION: { bool flag = false; for(unsigned i=0; imkNode( kind::REGEXP_OR, vec_nodes ); + return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ); } Node RegExpOpr::complement( Node r ) { @@ -590,7 +575,7 @@ Node RegExpOpr::complement( Node r ) { } Node tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, d_sigma, d_sigma_star ); vec_nodes.push_back( tmp ); - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ); + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ); } } else { Trace("strings-error") << "Unsupported string variable: " << r[0] << " in complement of RegExp." << std::endl; @@ -613,10 +598,10 @@ Node RegExpOpr::complement( Node r ) { } vec_nodes.push_back( tmp ); } - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ); + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ); } break; - case kind::REGEXP_OR: + case kind::REGEXP_UNION: { std::vector< Node > vec_nodes; for(unsigned i=0; imkNode( kind::REGEXP_OR, vec_nodes ); + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ); } break; case kind::REGEXP_STAR: @@ -657,13 +642,163 @@ Node RegExpOpr::complement( Node r ) { } //simplify -void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes) { +void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) { + Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl; Assert(t.getKind() == kind::STRING_IN_REGEXP); Node str = Rewriter::rewrite(t[0]); Node re = Rewriter::rewrite(t[1]); - simplifyRegExp( str, re, new_nodes ); + if(polarity) { + simplifyPRegExp( str, re, new_nodes ); + } else { + simplifyNRegExp( str, re, new_nodes ); + } + Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n"; + for(unsigned i=0; i &new_nodes ) { +void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) { + std::pair < Node, Node > p(s, r); + std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p); + if(itr != d_simpl_neg_cache.end()) { + new_nodes.push_back( itr->second ); + return; + } else { + int k = r.getKind(); + Node conc; + switch( k ) { + case kind::REGEXP_EMPTY: { + conc = d_true; + break; + } + case kind::REGEXP_SIGMA: { + conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate(); + break; + } + case kind::STRING_TO_REGEXP: { + conc = s.eqNode(r[0]).negate(); + break; + } + case kind::REGEXP_CONCAT: { + //TODO: rewrite empty + Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); + Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); + Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero), + NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) ); + Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2); + Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2)); + Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1)); + Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate(); + if(r[0].getKind() == kind::STRING_TO_REGEXP) { + s1r1 = s1.eqNode(r[0][0]).negate(); + } else if(r[0].getKind() == kind::REGEXP_EMPTY) { + s1r1 = d_true; + } + Node r2 = r[1]; + if(r.getNumChildren() > 2) { + std::vector< Node > nvec; + for(unsigned i=1; imkNode(kind::REGEXP_CONCAT, nvec); + } + r2 = Rewriter::rewrite(r2); + Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate(); + if(r2.getKind() == kind::STRING_TO_REGEXP) { + s2r2 = s2.eqNode(r2[0]).negate(); + } else if(r2.getKind() == kind::REGEXP_EMPTY) { + s2r2 = d_true; + } + + conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2); + conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc); + conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc); + conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc); + conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc); + break; + } + case kind::REGEXP_UNION: { + std::vector< Node > c_and; + for(unsigned i=0; imkNode(kind::STRING_IN_REGEXP, s, r[i]).negate()); + } + } + conc = c_and.size() == 0 ? d_true : + c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and); + break; + } + case kind::REGEXP_INTER: { + bool emptyflag = false; + std::vector< Node > c_or; + for(unsigned i=0; imkNode(kind::STRING_IN_REGEXP, s, r[i]).negate()); + } + } + if(emptyflag) { + conc = d_true; + } else { + conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or); + } + break; + } + case kind::REGEXP_STAR: { + if(s == d_emptyString) { + conc = d_false; + } else if(r[0].getKind() == kind::REGEXP_EMPTY) { + conc = s.eqNode(d_emptyString).negate(); + } else if(r[0].getKind() == kind::REGEXP_SIGMA) { + conc = d_false; + } else { + Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s); + Node sne = s.eqNode(d_emptyString).negate(); + Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); + Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); + Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one), + NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) ); + //internal + Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1); + Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1)); + //Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + //Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + //Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2); + //Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2)); + //Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1)); + Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate(); + Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate(); + + conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2); + //conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc); + //conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc); + conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc); + conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc); + conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc); + } + break; + } + default: { + Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl; + AlwaysAssert( false, "Unsupported Term" ); + } + } + conc = Rewriter::rewrite( conc ); + new_nodes.push_back( conc ); + d_simpl_neg_cache[p] = conc; + } +} +void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) { std::pair < Node, Node > p(s, r); std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p); if(itr != d_simpl_cache.end()) { @@ -671,34 +806,24 @@ void RegExpOpr::simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes ) return; } else { int k = r.getKind(); + Node conc; switch( k ) { - case kind::REGEXP_EMPTY: - { - Node eq = NodeManager::currentNM()->mkConst( false ); - new_nodes.push_back( eq ); - d_simpl_cache[p] = eq; - } + case kind::REGEXP_EMPTY: { + conc = d_false; break; - case kind::REGEXP_SIGMA: - { - Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); - Node eq = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)); - new_nodes.push_back( eq ); - d_simpl_cache[p] = eq; } + case kind::REGEXP_SIGMA: { + conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)); break; - case kind::STRING_TO_REGEXP: - { - Node eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, r[0] ); - new_nodes.push_back( eq ); - d_simpl_cache[p] = eq; } + case kind::STRING_TO_REGEXP: { + conc = s.eqNode(r[0]); break; - case kind::REGEXP_CONCAT: - { + } + case kind::REGEXP_CONCAT: { + std::vector< Node > nvec; std::vector< Node > cc; bool emptyflag = false; - Node ff = NodeManager::currentNM()->mkConst( false ); for(unsigned i=0; i &new_nodes ) emptyflag = true; break; } else { - Node sk = NodeManager::currentNM()->mkSkolem( "rcon_$$", s.getType(), "created for regular expression concat" ); - simplifyRegExp( sk, r[i], new_nodes ); - if(new_nodes.size() != 0) { - if(new_nodes[new_nodes.size() - 1] == ff) { - emptyflag = true; - break; - } - } - cc.push_back( sk ); + Node sk = NodeManager::currentNM()->mkSkolem( "rc_$$", s.getType(), "created for regular expression concat" ); + Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]); + nvec.push_back(lem); + cc.push_back(sk); } } if(emptyflag) { - new_nodes.push_back( ff ); - d_simpl_cache[p] = ff; + conc = d_false; } else { - Node cc_eq = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) ); - new_nodes.push_back( cc_eq ); - d_simpl_cache[p] = cc_eq; + Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) ); + nvec.push_back(lem); + conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec); } - } break; - case kind::REGEXP_OR: - { + } + case kind::REGEXP_UNION: { std::vector< Node > c_or; for(unsigned i=0; imkNode(kind::STRING_IN_REGEXP, s, r[i])); + } } - Node eq = NodeManager::currentNM()->mkNode( kind::OR, c_or ); - new_nodes.push_back( eq ); - d_simpl_cache[p] = eq; - } + conc = c_or.size() == 0 ? d_false : + c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or); break; - case kind::REGEXP_INTER: - { - Node nfalse = NodeManager::currentNM()->mkConst( false ); + } + case kind::REGEXP_INTER: { + std::vector< Node > c_and; + bool emptyflag = false; for(unsigned i=0; imkNode(kind::STRING_IN_REGEXP, s, r[i])); } } - } + if(emptyflag) { + conc = d_false; + } else { + conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and); + } break; - case kind::REGEXP_STAR: - { - Node eq; - if(r[0].getKind() == kind::REGEXP_EMPTY) { - eq = NodeManager::currentNM()->mkConst( false ); + } + case kind::REGEXP_STAR: { + if(s == d_emptyString) { + conc = d_true; + } else if(r[0].getKind() == kind::REGEXP_EMPTY) { + conc = s.eqNode(d_emptyString); } else if(r[0].getKind() == kind::REGEXP_SIGMA) { - eq = NodeManager::currentNM()->mkConst( true ); + conc = d_true; } else { - eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r ); + Node se = s.eqNode(d_emptyString); + Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]); + Node sk1 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" ); + Node sk2 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" ); + Node s1nz = sk1.eqNode(d_emptyString).negate(); + Node s2nz = sk2.eqNode(d_emptyString).negate(); + Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); + Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r); + Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2)); + + conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs); + conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc); } - new_nodes.push_back( eq ); - d_simpl_cache[p] = eq; - } - break; - default: - //TODO: special sym: sigma, none, all - Trace("strings-preprocess") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl; - Assert( false ); break; + } + default: { + Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl; + AlwaysAssert( false, "Unsupported Term" ); + } } + conc = Rewriter::rewrite( conc ); + new_nodes.push_back( conc ); + d_simpl_cache[p] = conc; } } @@ -790,33 +931,28 @@ std::string RegExpOpr::mkString( Node r ) { } else { int k = r.getKind(); switch( k ) { - case kind::REGEXP_EMPTY: - { + case kind::REGEXP_EMPTY: { retStr += "Empty"; - } break; - case kind::REGEXP_SIGMA: - { - retStr += "{W}"; } + case kind::REGEXP_SIGMA: { + retStr += "{W}"; break; - case kind::STRING_TO_REGEXP: - { - retStr += niceChar( r[0] ); } + case kind::STRING_TO_REGEXP: { + retStr += niceChar( r[0] ); break; - case kind::REGEXP_CONCAT: - { + } + case kind::REGEXP_CONCAT: { retStr += "("; for(unsigned i=0; i PairDvStr; private: Node d_emptyString; + Node d_true; + Node d_false; + Node d_emptyRegexp; + Node d_zero; + Node d_one; + char d_char_start; char d_char_end; Node d_sigma; Node d_sigma_star; std::map< std::pair< Node, Node >, Node > d_simpl_cache; + std::map< std::pair< Node, Node >, Node > d_simpl_neg_cache; std::map< Node, Node > d_compl_cache; std::map< Node, int > d_delta_cache; std::map< PairDvStr, Node > d_dv_cache; std::map< Node, bool > d_cstre_cache; //bool checkStarPlus( Node t ); - void simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes ); + void simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ); + void simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ); std::string niceChar( Node r ); int gcd ( int a, int b ); public: RegExpOpr(); bool checkConstRegExp( Node r ); - void simplify(Node t, std::vector< Node > &new_nodes); + void simplify(Node t, std::vector< Node > &new_nodes, bool polarity); Node mkAllExceptOne( char c ); Node complement( Node r ); int delta( Node r ); diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index ba36aa371..28ef43cf9 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -47,6 +47,7 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu d_str_pos_ctn( c ), d_str_neg_ctn( c ), d_reg_exp_mem( c ), + d_regexp_deriv_processed( c ), d_curr_cardinality( c, 0 ) { // The kinds we are treating as function application in congruence @@ -61,7 +62,8 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); - d_true = NodeManager::currentNM()->mkConst( true ); + d_emptyRegexp = NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY ); + d_true = NodeManager::currentNM()->mkConst( true ); d_false = NodeManager::currentNM()->mkConst( false ); d_all_warning = true; @@ -1010,10 +1012,12 @@ bool TheoryStrings::processLoop(std::vector< Node > &antec, conc = NodeManager::currentNM()->mkNode( kind::AND, vec_conc );//, len_x_gt_len_y } // normal case - //set its antecedant to ant, to say when it is relevant - d_reg_exp_ant[str_in_re] = ant; - //unroll the str in re constraint once - unrollStar( str_in_re ); + if( !options::stringExp() ) { + //set its antecedant to ant, to say when it is relevant + d_reg_exp_ant[str_in_re] = ant; + //unroll the str in re constraint once + unrollStar( str_in_re ); + } sendLemma( ant, conc, "LOOP-BREAK" ); ++(d_statistics.d_loop_lemmas); @@ -2234,7 +2238,7 @@ bool TheoryStrings::unrollStar( Node atom ) { cc.push_back(unr0); } else { std::vector< Node > urc; - d_regexp_opr.simplify(unr1, urc); + d_regexp_opr.simplify(unr1, urc, true); Node unr0 = sk_s.eqNode( d_emptyString ).negate(); cc.push_back(unr0); //cc.push_back(urc1); cc.insert(cc.end(), urc.begin(), urc.end()); @@ -2270,6 +2274,84 @@ bool TheoryStrings::unrollStar( Node atom ) { } bool TheoryStrings::checkMemberships() { + bool is_unk = false; + bool addedLemma = false; + for( unsigned i=0; imkNode(kind::AND, antec, atom); + Node conc = Node::null(); + sendLemma(antec, conc, "RegExp Delta Conflict"); + addedLemma = true; + flag = false; + d_regexp_deriv_processed[atom] = true; + } + } /*else if(splitRegExp( x, r, atom )) { + addedLemma = true; flag = false; + d_regexp_deriv_processed[ atom ] = true; + }*/ + } + } else { + //TODO + if(! options::stringExp()) { + is_unk = true; + break; + } + } + if(flag) { + std::vector< Node > nvec; + d_regexp_opr.simplify(atom, nvec, polarity); + Node conc = nvec.size()==1 ? nvec[0] : + NodeManager::currentNM()->mkNode(kind::AND, nvec); + conc = Rewriter::rewrite(conc); + sendLemma( assertion, conc, "REGEXP" ); + addedLemma = true; + d_regexp_cache[assertion] = true; + } + } + if(d_conflict) { + break; + } + } + if( addedLemma ){ + doPendingLemmas(); + return true; + }else{ + if( is_unk ){ + Trace("strings-regexp") << "Strings::regexp: possibly incomplete." << std::endl; + d_out->setIncomplete(); + } + return false; + } +} +//TODO remove +bool TheoryStrings::checkMemberships2() { bool is_unk = false; bool addedLemma = false; for( unsigned i=0; imkBoundVar("bi", NodeManager::currentNM()->integerType()); + Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); Node g1 = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode( kind::GEQ, b1, d_zero ), NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode( kind::MINUS, lenx, lens ), b1 ) ) ); - Node b2 = NodeManager::currentNM()->mkBoundVar("bj", NodeManager::currentNM()->integerType()); + Node b2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, x, NodeManager::currentNM()->mkNode( kind::PLUS, b1, b2 ), d_one); Node s5 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b2, d_one); - Node s1 = NodeManager::currentNM()->mkBoundVar("s1", NodeManager::currentNM()->stringType()); - Node s3 = NodeManager::currentNM()->mkBoundVar("s3", NodeManager::currentNM()->stringType()); - Node s4 = NodeManager::currentNM()->mkBoundVar("s4", NodeManager::currentNM()->stringType()); - Node s6 = NodeManager::currentNM()->mkBoundVar("s6", NodeManager::currentNM()->stringType()); - Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b2, s1, s3, s4, s6); + //Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + //Node s3 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + //Node s4 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + //Node s6 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b2);//, s1, s3, s4, s6); std::vector< Node > vec_nodes; Node cc = NodeManager::currentNM()->mkNode( kind::GEQ, b2, d_zero ); @@ -2457,22 +2539,22 @@ bool TheoryStrings::checkNegContains() { cc = NodeManager::currentNM()->mkNode( kind::GEQ, lens, b2 ); vec_nodes.push_back(cc); - cc = x.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2, s3)); - vec_nodes.push_back(cc); - cc = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s4, s5, s6)); - vec_nodes.push_back(cc); + //cc = x.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2, s3)); + //vec_nodes.push_back(cc); + //cc = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s4, s5, s6)); + //vec_nodes.push_back(cc); cc = s2.eqNode(s5).negate(); vec_nodes.push_back(cc); - cc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1).eqNode(NodeManager::currentNM()->mkNode( kind::PLUS, b1, b2 )); - vec_nodes.push_back(cc); - cc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s4).eqNode(b2); - vec_nodes.push_back(cc); + //cc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1).eqNode(NodeManager::currentNM()->mkNode( kind::PLUS, b1, b2 )); + //vec_nodes.push_back(cc); + //cc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s4).eqNode(b2); + //vec_nodes.push_back(cc); // charAt length - cc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s2)); - vec_nodes.push_back(cc); - cc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s5)); - vec_nodes.push_back(cc); + //cc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s2)); + //vec_nodes.push_back(cc); + //cc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s5)); + //vec_nodes.push_back(cc); Node conc = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::AND, vec_nodes) ); Node xlss = NodeManager::currentNM()->mkNode( kind::GT, lens, lenx ); @@ -2553,7 +2635,7 @@ bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) { for(unsigned i=0; imkNode( kind::STRING_IN_REGEXP, left, dc ); std::vector< Node > sdc; - d_regexp_opr.simplify(conc, sdc); + d_regexp_opr.simplify(conc, sdc, true); if(sdc.size() == 1) { conc = sdc[0]; } else { diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index ea92865b2..bf8a3d894 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -111,6 +111,7 @@ public: private: // Constants Node d_emptyString; + Node d_emptyRegexp; Node d_true; Node d_false; Node d_zero; @@ -228,6 +229,7 @@ private: bool checkCardinality(); bool checkInductiveEquations(); bool checkMemberships(); + bool checkMemberships2(); bool checkContains(); bool checkPosContains(); bool checkNegContains(); @@ -299,6 +301,7 @@ private: private: // regular expression memberships NodeList d_reg_exp_mem; + std::map< Node, bool > d_regexp_cache; // antecedant for why reg exp membership must be true std::map< Node, Node > d_reg_exp_ant; std::map< Node, bool > d_reg_exp_unroll; @@ -310,6 +313,7 @@ private: std::map< Node, bool > d_membership_length; // regular expression derivative std::map< Node, bool > d_reg_exp_deriv; + NodeBoolMap d_regexp_deriv_processed; // regular expression operations RegExpOpr d_regexp_opr; diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 43d3bfe47..bb79f337b 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -32,58 +32,76 @@ StringsPreprocess::StringsPreprocess() { d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) ); } -void StringsPreprocess::simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn ) { +void StringsPreprocess::processRegExp( Node s, Node r, std::vector< Node > &ret ) { int k = r.getKind(); switch( k ) { - case kind::STRING_TO_REGEXP: - { - Node eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, r[0] ); + case kind::REGEXP_EMPTY: { + Node eq = NodeManager::currentNM()->mkConst( false ); ret.push_back( eq ); + break; } + case kind::REGEXP_SIGMA: { + Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); + Node eq = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)); + ret.push_back( eq ); break; - case kind::REGEXP_CONCAT: - { + } + case kind::STRING_TO_REGEXP: { + Node eq = s.eqNode( r[0] ); + ret.push_back( eq ); + break; + } + case kind::REGEXP_CONCAT: { + bool flag = true; std::vector< Node > cc; for(unsigned i=0; imkSkolem( "recsym_$$", s.getType(), "created for regular expression concat" ); - simplifyRegExp( sk, r[i], ret, nn ); - cc.push_back( sk ); + flag = false; + break; } } - Node cc_eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, - NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, cc ) ); - nn.push_back( cc_eq ); - } + if(flag) { + Node eq = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc)); + ret.push_back(eq); + } else { + Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r ); + ret.push_back( eq ); + } break; - case kind::REGEXP_OR: - { + } + case kind::REGEXP_UNION: { std::vector< Node > c_or; for(unsigned i=0; i ntmp; + processRegExp( s, r[i], ntmp ); + Node lem = ntmp.size()==1 ? ntmp[0] : NodeManager::currentNM()->mkNode(kind::AND, ntmp); + c_or.push_back( lem ); } - Node eq = NodeManager::currentNM()->mkNode( kind::OR, c_or ); + Node eq = NodeManager::currentNM()->mkNode(kind::OR, c_or); ret.push_back( eq ); - } break; - case kind::REGEXP_INTER: + } + case kind::REGEXP_INTER: { for(unsigned i=0; imkNode( kind::STRING_IN_REGEXP, s, r ); - ret.push_back( eq ); } + case kind::REGEXP_STAR: { + if(r[0].getKind() == kind::REGEXP_SIGMA) { + ret.push_back(NodeManager::currentNM()->mkConst(true)); + } else { + Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r ); + ret.push_back( eq ); + } break; - default: - //TODO: special sym: sigma, none, all + } + default: { Trace("strings-preprocess") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl; - Assert( false ); - break; + AlwaysAssert( false, "Unsupported Term" ); + } } } @@ -150,24 +168,18 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { d_cache[t] = t; retNode = t; } - } else */if( t.getKind() == kind::STRING_IN_REGEXP ) { - // t0 in t1 + } else */ + if( t.getKind() == kind::STRING_IN_REGEXP ) { Node t0 = simplify( t[0], new_nodes ); - //if(!checkStarPlus( t[1] )) { - //rewrite it - std::vector< Node > ret; - std::vector< Node > nn; - simplifyRegExp( t0, t[1], ret, nn ); - new_nodes.insert( new_nodes.end(), nn.begin(), nn.end() ); + //rewrite it + std::vector< Node > ret; + processRegExp( t0, t[1], ret ); - Node n = ret.size() == 1 ? ret[0] : NodeManager::currentNM()->mkNode( kind::AND, ret ); - d_cache[t] = (t == n) ? Node::null() : n; - retNode = n; - //} else { - // TODO - // return (t0 == t[0]) ? t : NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, t0, t[1] ); - //} + Node n = ret.size() == 1 ? ret[0] : NodeManager::currentNM()->mkNode( kind::AND, ret ); + n = Rewriter::rewrite(n); + d_cache[t] = (t == n) ? Node::null() : n; + retNode = n; } else if( t.getKind() == kind::STRING_SUBSTR_TOTAL ) { Node lenxgti = NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] ), @@ -246,7 +258,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { new_nodes.push_back(lem); //non-neg - Node b1 = NodeManager::currentNM()->mkBoundVar("x", NodeManager::currentNM()->integerType()); + Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); Node g1 = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode( kind::GEQ, b1, d_zero ), NodeManager::currentNM()->mkNode( kind::GT, lenp, b1 ) ) ); @@ -286,8 +298,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node cc3 = NodeManager::currentNM()->mkNode(kind::GEQ, ufMx, d_zero); Node cc4 = NodeManager::currentNM()->mkNode(kind::GEQ, nine, ufMx); - Node b21 = NodeManager::currentNM()->mkBoundVar("y", NodeManager::currentNM()->stringType()); - Node b22 = NodeManager::currentNM()->mkBoundVar("z", NodeManager::currentNM()->stringType()); + Node b21 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + Node b22 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b21, b22); Node c21 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, b21).eqNode( @@ -367,23 +379,14 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node cc1 = t[0].eqNode(NodeManager::currentNM()->mkConst(::CVC4::String(""))); //cc1 = NodeManager::currentNM()->mkNode(kind::AND, ufP0.eqNode(negone), cc1); //cc2 - Node b1 = NodeManager::currentNM()->mkBoundVar("x", NodeManager::currentNM()->integerType()); - Node z1 = NodeManager::currentNM()->mkBoundVar("z1", NodeManager::currentNM()->stringType()); - Node z2 = NodeManager::currentNM()->mkBoundVar("z2", NodeManager::currentNM()->stringType()); - Node z3 = NodeManager::currentNM()->mkBoundVar("z3", NodeManager::currentNM()->stringType()); - Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1, z1, z2, z3); std::vector< Node > vec_n; - Node g = NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero); - vec_n.push_back(g); - g = NodeManager::currentNM()->mkNode(kind::GT, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, t[0]), b1); - vec_n.push_back(g); - g = b1.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, z1) ); - vec_n.push_back(g); - g = one.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, z2) ); + Node z1 = NodeManager::currentNM()->mkSkolem("z1_$$", NodeManager::currentNM()->stringType()); + Node z2 = NodeManager::currentNM()->mkSkolem("z1_$$", NodeManager::currentNM()->stringType()); + Node z3 = NodeManager::currentNM()->mkSkolem("z1_$$", NodeManager::currentNM()->stringType()); + Node g = one.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, z2) ); vec_n.push_back(g); g = t[0].eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, z1, z2, z3) ); vec_n.push_back(g); - //Node z2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, t[0], one); char chtmp[2]; chtmp[1] = '\0'; for(unsigned i=0; i<=9; i++) { @@ -393,10 +396,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { vec_n.push_back(g); } Node cc2 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, vec_n)); - cc2 = NodeManager::currentNM()->mkNode(kind::EXISTS, b1v, cc2); - //cc2 = NodeManager::currentNM()->mkNode(kind::AND, ufP0.eqNode(negone), cc2); //cc3 - Node b2 = NodeManager::currentNM()->mkBoundVar("y", NodeManager::currentNM()->integerType()); + Node b2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b2); Node g2 = NodeManager::currentNM()->mkNode(kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b2, d_zero), @@ -404,8 +405,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node ufx = NodeManager::currentNM()->mkNode(kind::APPLY_UF, ufP, b2); Node ufx1 = NodeManager::currentNM()->mkNode(kind::APPLY_UF, ufP, NodeManager::currentNM()->mkNode(kind::MINUS,b2,one)); Node ufMx = NodeManager::currentNM()->mkNode(kind::APPLY_UF, ufM, b2); - Node w1 = NodeManager::currentNM()->mkBoundVar("w1", NodeManager::currentNM()->stringType()); - Node w2 = NodeManager::currentNM()->mkBoundVar("w2", NodeManager::currentNM()->stringType()); + Node w1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + Node w2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); Node b3v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, w1, w2); Node b2gtz = NodeManager::currentNM()->mkNode(kind::GT, b2, d_zero); Node c3c1 = ufx1.eqNode( NodeManager::currentNM()->mkNode(kind::PLUS, @@ -533,17 +534,12 @@ Node StringsPreprocess::decompose(Node t, std::vector< Node > & new_nodes) { unsigned num = t.getNumChildren(); if(num == 0) { return simplify(t, new_nodes); - } else if(num == 1) { - Node s = decompose(t[0], new_nodes); - if(s != t[0]) { - Node tmp = NodeManager::currentNM()->mkNode( t.getKind(), s ); - return simplify(tmp, new_nodes); - } else { - return simplify(t, new_nodes); - } } else { bool changed = false; std::vector< Node > cc; + if (t.getMetaKind() == kind::metakind::PARAMETERIZED) { + cc.push_back(t.getOperator()); + } for(unsigned i=0; i &ret, std::vector< Node > &nn ); + void processRegExp( Node s, Node r, std::vector< Node > &ret ); Node simplify( Node t, std::vector< Node > &new_nodes ); Node decompose( Node t, std::vector< Node > &new_nodes ); public: diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index c0d66ab22..b118e389e 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -28,28 +28,23 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { Node retNode = node; std::vector node_vec; Node preNode = Node::null(); - bool emptyflag = false; for(unsigned int i=0; imkConst( preNode.getConst().concat( tmpNode[0].getConst() ) ); node_vec.push_back( preNode ); preNode = Node::null(); - ++j; } else { node_vec.push_back( preNode ); preNode = Node::null(); node_vec.push_back( tmpNode[0] ); - ++j; } + ++j; } for(; j().isEmptyString() ) { preNode = Node::null(); } else { @@ -75,17 +70,13 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { } } } - if(emptyflag) { - retNode = NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY ); + if(preNode != Node::null()) { + node_vec.push_back( preNode ); + } + if(node_vec.size() > 1) { + retNode = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, node_vec); } else { - if(preNode != Node::null()) { - node_vec.push_back( preNode ); - } - if(node_vec.size() > 1) { - retNode = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, node_vec); - } else { - retNode = node_vec[0]; - } + retNode = node_vec[0]; } Trace("strings-prerewrite") << "Strings::rewriteConcatString end " << retNode << std::endl; return retNode; @@ -101,23 +92,22 @@ Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) { for(unsigned int i=0; imkNode( kind::STRING_CONCAT, preNode, tmpNode[0][0] ) ); node_vec.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, preNode ) ); preNode = Node::null(); - ++j; } else { node_vec.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, preNode ) ); preNode = Node::null(); node_vec.push_back( tmpNode[0] ); - ++j; } + ++j; } for(; j node_vec; - bool flag = true; - for(unsigned int i=0; imkConst( kind::REGEXP_EMPTY ) : - node_vec.size() == 1 ? node_vec[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_OR, node_vec); + node_vec.size() == 1 ? node_vec[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, node_vec); } Trace("strings-prerewrite") << "Strings::prerewriteOrRegExp end " << retNode << std::endl; return retNode; @@ -209,17 +199,15 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i Assert( index_start <= s.size() ); int k = r.getKind(); switch( k ) { - case kind::STRING_TO_REGEXP: - { + case kind::STRING_TO_REGEXP: { CVC4::String s2 = s.substr( index_start, s.size() - index_start ); if(r[0].getKind() == kind::CONST_STRING) { return ( s2 == r[0].getConst() ); } else { - Assert( false, "RegExp contains variable" ); + Assert( false, "RegExp contains variables" ); } } - case kind::REGEXP_CONCAT: - { + case kind::REGEXP_CONCAT: { if( s.size() != index_start ) { std::vector vec_k( r.getNumChildren(), -1 ); int start = 0; @@ -256,24 +244,21 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i return true; } } - case kind::REGEXP_OR: - { + case kind::REGEXP_UNION: { for(unsigned i=0; i0; --k) { + for(unsigned k=s.size() - index_start; k>0; --k) { CVC4::String t = s.substr(index_start, k); if( testConstStringInRegExp( t, 0, r[0] ) ) { if( index_start + k == s.size() || testConstStringInRegExp( s, index_start + k, r ) ) { @@ -286,25 +271,24 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i return true; } } - case kind::REGEXP_EMPTY: - { + case kind::REGEXP_EMPTY: { return false; } - case kind::REGEXP_SIGMA: - { + case kind::REGEXP_SIGMA: { if(s.size() == 1) { return true; } else { return false; } } - default: - //TODO: special sym: sigma, none, all + default: { Trace("strings-postrewrite") << "Unsupported term: " << r << " in testConstStringInRegExp." << std::endl; - Assert( false ); + AlwaysAssert( false, "Unsupported Term" ); return false; + } } } + Node TheoryStringsRewriter::rewriteMembership(TNode node) { Node retNode = node; Node x = node[0]; @@ -319,10 +303,13 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { //test whether x in node[1] CVC4::String s = x.getConst(); retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, node[1] ) ); - } else { - if( x != node[0] ) { - retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, node[1] ); - } + } else if(node[1].getKind() == kind::REGEXP_SIGMA) { + Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); + retNode = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x)); + } else if(node[1].getKind() == kind::REGEXP_STAR && node[1][0].getKind() == kind::REGEXP_SIGMA) { + retNode = NodeManager::currentNM()->mkConst( true ); + } else if( x != node[0] ) { + retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, node[1] ); } return retNode; } @@ -383,11 +370,21 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { Node zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) ); if(node[2] == zero) { retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); - } else if( node[0].isConst() && node[1].isConst() && node[2].isConst() ) { - int i = node[1].getConst().getNumerator().toUnsignedInt(); - int j = node[2].getConst().getNumerator().toUnsignedInt(); - if( i>=0 && j>=0 && node[0].getConst().size() >= (unsigned) (i + j) ) { - retNode = NodeManager::currentNM()->mkConst( node[0].getConst().substr(i, j) ); + } else if( node[1].isConst() && node[2].isConst() ) { + if(node[1].getConst().sgn()>=0 && node[2].getConst().sgn()>=0) { + int i = node[1].getConst().getNumerator().toUnsignedInt(); + int j = node[2].getConst().getNumerator().toUnsignedInt(); + if( node[0].isConst() ) { + if( node[0].getConst().size() >= (unsigned) (i + j) ) { + retNode = NodeManager::currentNM()->mkConst( node[0].getConst().substr(i, j) ); + } else { + retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); + } + } else if(node[0].getKind() == kind::STRING_CONCAT && node[0][0].isConst()) { + if( node[0][0].getConst().size() >= (unsigned) (i + j) ) { + retNode = NodeManager::currentNM()->mkConst( node[0][0].getConst().substr(i, j) ); + } + } } else { retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); } @@ -492,7 +489,12 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { CVC4::String s = node[0].getConst(); if(s.isNumber()) { std::string stmp = s.toString(); - retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(stmp.c_str())); + if(stmp[0] == '0' && stmp.size() != 1) { + //TODO: leading zeros + retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(-1)); + } else { + retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(stmp.c_str())); + } } else { retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(-1)); } @@ -524,13 +526,13 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { retNode = rewriteConcatString(node); } else if(node.getKind() == kind::REGEXP_CONCAT) { retNode = prerewriteConcatRegExp(node); - } else if(node.getKind() == kind::REGEXP_OR) { + } else if(node.getKind() == kind::REGEXP_UNION) { retNode = prerewriteOrRegExp(node); } else if(node.getKind() == kind::REGEXP_PLUS) { retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, node[0], NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, node[0])); } else if(node.getKind() == kind::REGEXP_OPT) { - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ), node[0]); } else if(node.getKind() == kind::REGEXP_RANGE) { @@ -544,7 +546,7 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { if(vec_nodes.size() == 1) { retNode = vec_nodes[0]; } else { - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ); + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ); } } diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index 4ac4c26b4..7eb4ac3d0 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -261,7 +261,7 @@ public: } }; -class RegExpOrTypeRule { +class RegExpUnionTypeRule { public: inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check) throw (TypeCheckingExceptionPrivate, AssertionException) { diff --git a/test/regress/regress0/strings/Makefile.am b/test/regress/regress0/strings/Makefile.am index f11ada1a1..705a7eadb 100644 --- a/test/regress/regress0/strings/Makefile.am +++ b/test/regress/regress0/strings/Makefile.am @@ -37,7 +37,6 @@ TESTS = \ model001.smt2 \ substr001.smt2 \ regexp001.smt2 \ - regexp002.smt2 \ loop001.smt2 \ loop002.smt2 \ loop003.smt2 \ @@ -48,6 +47,8 @@ TESTS = \ loop008.smt2 \ loop009.smt2 +#regexp002.smt2 + FAILING_TESTS = EXTRA_DIST = $(TESTS) diff --git a/test/regress/regress0/strings/fmf001.smt2 b/test/regress/regress0/strings/fmf001.smt2 index f5ed1c3fb..05bbab586 100644 --- a/test/regress/regress0/strings/fmf001.smt2 +++ b/test/regress/regress0/strings/fmf001.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S) +(set-option :strings-exp true) (set-option :strings-fmf true) (set-info :status sat) diff --git a/test/regress/regress0/strings/fmf002.smt2 b/test/regress/regress0/strings/fmf002.smt2 index 525fc152c..1d41b1085 100644 --- a/test/regress/regress0/strings/fmf002.smt2 +++ b/test/regress/regress0/strings/fmf002.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S) +(set-option :strings-exp true) (set-option :strings-fmf true) (set-info :status sat) diff --git a/test/regress/regress0/strings/loop007.smt2 b/test/regress/regress0/strings/loop007.smt2 index b292de512..a97d97d54 100644 --- a/test/regress/regress0/strings/loop007.smt2 +++ b/test/regress/regress0/strings/loop007.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S) +(set-option :strings-exp true) (set-info :status sat) (declare-fun x () String) diff --git a/test/regress/regress0/strings/loop008.smt2 b/test/regress/regress0/strings/loop008.smt2 index 91ed8cdf0..113577e48 100644 --- a/test/regress/regress0/strings/loop008.smt2 +++ b/test/regress/regress0/strings/loop008.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S) +(set-option :strings-exp true) (set-info :status sat) (declare-fun x () String) diff --git a/test/regress/regress0/strings/loop009.smt2 b/test/regress/regress0/strings/loop009.smt2 index 41eab0f35..9ccc6de6e 100644 --- a/test/regress/regress0/strings/loop009.smt2 +++ b/test/regress/regress0/strings/loop009.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S) +(set-option :strings-exp true) (set-info :status sat) (declare-fun x () String) diff --git a/test/regress/regress0/strings/regexp001.smt2 b/test/regress/regress0/strings/regexp001.smt2 index 41aefbd41..6a2044ea8 100644 --- a/test/regress/regress0/strings/regexp001.smt2 +++ b/test/regress/regress0/strings/regexp001.smt2 @@ -1,5 +1,6 @@ (set-logic QF_S) (set-info :status sat) +(set-option :strings-exp true) (declare-fun x () String) diff --git a/test/regress/regress0/strings/regexp002.smt2 b/test/regress/regress0/strings/regexp002.smt2 index e2a44a9ab..18541af4e 100644 --- a/test/regress/regress0/strings/regexp002.smt2 +++ b/test/regress/regress0/strings/regexp002.smt2 @@ -1,5 +1,6 @@ (set-logic QF_S) (set-info :status sat) +(set-option :strings-exp true) (declare-fun x () String) (declare-fun y () String) -- 2.30.2