From 698f5a09b1c0177abfd2eaa2b110de100fd108ef Mon Sep 17 00:00:00 2001 From: Tianyi Liang Date: Thu, 24 Apr 2014 17:30:15 -0500 Subject: [PATCH] minor change: add a heuristic for preventing constant splitting. --- src/theory/strings/theory_strings.cpp | 59 +++++++++++++------- src/theory/strings/theory_strings.h | 5 +- src/util/regexp.cpp | 80 +++++++++++++++++++++++++++ src/util/regexp.h | 74 ++++--------------------- 4 files changed, 133 insertions(+), 85 deletions(-) diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index f2172071d..d03faa72a 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -65,7 +65,7 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu d_curr_cardinality(c, 0) { // The kinds we are treating as function application in congruence - //d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP); + d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP); d_equalityEngine.addFunctionKind(kind::STRING_LENGTH); d_equalityEngine.addFunctionKind(kind::STRING_CONCAT); d_equalityEngine.addFunctionKind(kind::STRING_STRCTN); @@ -418,7 +418,7 @@ void TheoryStrings::preRegisterTerm(TNode n) { break; case kind::STRING_IN_REGEXP: //do not add trigger here - //d_equalityEngine.addTriggerPredicate(n); + d_equalityEngine.addTriggerPredicate(n); break; case kind::STRING_SUBSTR_TOTAL: { Node lenxgti = NodeManager::currentNM()->mkNode( kind::GEQ, @@ -560,7 +560,7 @@ void TheoryStrings::check(Effort e) { //must record string in regular expressions if ( atom.getKind() == kind::STRING_IN_REGEXP ) { addMembership(assertion); - //d_equalityEngine.assertPredicate(atom, polarity, fact); + d_equalityEngine.assertPredicate(atom, polarity, fact); } else if (atom.getKind() == kind::STRING_STRCTN) { if(polarity) { d_str_pos_ctn.push_back( atom ); @@ -1232,18 +1232,37 @@ bool TheoryStrings::processNEqc(std::vector< std::vector< Node > > &normal_forms Assert( other_str.getKind()!=kind::CONST_STRING, "Other string is not constant." ); Assert( other_str.getKind()!=kind::STRING_CONCAT, "Other string is not CONCAT." ); antec.insert(antec.end(), curr_exp.begin(), curr_exp.end() ); - Node firstChar = const_str.getConst().size() == 1 ? const_str : - NodeManager::currentNM()->mkConst( const_str.getConst().substr(0, 1) ); - //split the string - Node eq1 = Rewriter::rewrite( other_str.eqNode( d_emptyString ) ); - Node eq2 = mkSplitEq( "c_spt_$$", "created for v/c split", other_str, firstChar, false ); - d_pending_req_phase[ eq1 ] = true; - conc = NodeManager::currentNM()->mkNode( kind::OR, eq1, eq2 ); - Trace("strings-solve-debug") << "Break normal form constant/variable " << std::endl; - - Node ant = mkExplain( antec ); - sendLemma( ant, conc, "CST-SPLIT" ); - ++(d_statistics.d_eq_splits); + //Opt + bool optflag = false; + if( normal_forms[nconst_k].size() > nconst_index_k + 1 && + normal_forms[nconst_k][nconst_index_k + 1].isConst() ) { + CVC4::String stra = const_str.getConst(); + CVC4::String strb = normal_forms[nconst_k][nconst_index_k + 1].getConst(); + CVC4::String fc = strb.substr(0, 1); + if( stra.find(fc) == std::string::npos || + (stra.find(strb) == std::string::npos && + !stra.overlap(strb)) ) { + Node sk = NodeManager::currentNM()->mkSkolem( "sopt_$$", NodeManager::currentNM()->stringType(), "created for string sp" ); + Node eq = other_str.eqNode( mkConcat(const_str, sk) ); + Node ant = mkExplain( antec ); + sendLemma(ant, eq, "CST-EPS"); + optflag = true; + } + } + if(!optflag){ + Node firstChar = const_str.getConst().size() == 1 ? const_str : + NodeManager::currentNM()->mkConst( const_str.getConst().substr(0, 1) ); + //split the string + Node eq1 = Rewriter::rewrite( other_str.eqNode( d_emptyString ) ); + Node eq2 = mkSplitEq( "c_spt_$$", "created for v/c split", other_str, firstChar, false ); + d_pending_req_phase[ eq1 ] = true; + conc = NodeManager::currentNM()->mkNode( kind::OR, eq1, eq2 ); + Trace("strings-solve-debug") << "Break normal form constant/variable " << std::endl; + + Node ant = mkExplain( antec ); + sendLemma( ant, conc, "CST-SPLIT" ); + ++(d_statistics.d_eq_splits); + } return true; } else { std::vector< Node > antec_new_lits; @@ -1785,10 +1804,7 @@ void TheoryStrings::sendSplit( Node a, Node b, const char * c, bool preq ) { } Node TheoryStrings::mkConcat( Node n1, Node n2 ) { - std::vector< Node > c; - c.push_back( n1 ); - c.push_back( n2 ); - return mkConcat( c ); + return NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, n1, n2 ); } Node TheoryStrings::mkConcat( std::vector< Node >& c ) { @@ -2888,6 +2904,11 @@ void TheoryStrings::addMembership(Node assertion) { d_regexp_memberships.push_back( assertion ); } +Node TheoryStrings::instantiateSymRegExp(Node r) { + //TODO: + return r; +} + //// Finite Model Finding Node TheoryStrings::getNextDecisionRequest() { diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 9f99012df..33283d1cf 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -272,8 +272,8 @@ protected: void sendInfer( Node eq_exp, Node eq, const char * c ); void sendSplit( Node a, Node b, const char * c, bool preq = true ); /** mkConcat **/ - Node mkConcat( Node n1, Node n2 ); - Node mkConcat( std::vector< Node >& c ); + inline Node mkConcat( Node n1, Node n2 ); + inline Node mkConcat( std::vector< Node >& c ); /** mkExplain **/ Node mkExplain( std::vector< Node >& a ); Node mkExplain( std::vector< Node >& a, std::vector< Node >& an ); @@ -323,6 +323,7 @@ private: bool splitRegExp( Node x, Node r, Node ant ); bool addMembershipLength(Node atom); void addMembership(Node assertion); + Node instantiateSymRegExp(Node r); // Finite Model Finding diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index b6db624d5..3bc17b050 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -23,6 +23,71 @@ using namespace std; namespace CVC4 { +void String::toInternal(const std::string &s) { + d_str.clear(); + unsigned i=0; + while(i < s.size()) { + if(s[i] == '\\') { + i++; + if(i < s.size()) { + switch(s[i]) { + case 'n': {d_str.push_back( convertCharToUnsignedInt('\n') );i++;} break; + case 't': {d_str.push_back( convertCharToUnsignedInt('\t') );i++;} break; + case 'v': {d_str.push_back( convertCharToUnsignedInt('\v') );i++;} break; + case 'b': {d_str.push_back( convertCharToUnsignedInt('\b') );i++;} break; + case 'r': {d_str.push_back( convertCharToUnsignedInt('\r') );i++;} break; + case 'f': {d_str.push_back( convertCharToUnsignedInt('\f') );i++;} break; + case 'a': {d_str.push_back( convertCharToUnsignedInt('\a') );i++;} break; + case '\\': {d_str.push_back( convertCharToUnsignedInt('\\') );i++;} break; + case 'x': { + if(i + 2 < s.size()) { + if((isdigit(s[i+1]) || (s[i+1] >= 'a' && s[i+1] >= 'f') || (s[i+1] >= 'A' && s[i+1] >= 'F')) && + (isdigit(s[i+2]) || (s[i+2] >= 'a' && s[i+2] >= 'f') || (s[i+2] >= 'A' && s[i+2] >= 'F'))) { + d_str.push_back( convertCharToUnsignedInt( hexToDec(s[i+1]) * 16 + hexToDec(s[i+2]) ) ); + i += 3; + } else { + throw CVC4::Exception( "Error String Literal: \"" + s + "\"" ); + } + } else { + throw CVC4::Exception( "Error String Literal: \"" + s + "\"" ); + } + } + break; + default: { + if(isdigit(s[i])) { + int num = (int)s[i] - (int)'0'; + bool flag = num < 4; + if(i+1 < s.size() && num < 8 && isdigit(s[i+1]) && s[i+1] < '8') { + num = num * 8 + (int)s[i+1] - (int)'0'; + if(flag && i+2 < s.size() && isdigit(s[i+2]) && s[i+2] < '8') { + num = num * 8 + (int)s[i+2] - (int)'0'; + d_str.push_back( convertCharToUnsignedInt((char)num) ); + i += 3; + } else { + d_str.push_back( convertCharToUnsignedInt((char)num) ); + i += 2; + } + } else { + d_str.push_back( convertCharToUnsignedInt((char)num) ); + i++; + } + } else { + d_str.push_back( convertCharToUnsignedInt(s[i]) ); + i++; + } + } + } + } else { + throw CVC4::Exception( "should be handled by lexer: \"" + s + "\"" ); + //d_str.push_back( convertCharToUnsignedInt('\\') ); + } + } else { + d_str.push_back( convertCharToUnsignedInt(s[i]) ); + i++; + } + } +} + void String::getCharSet(std::set &cset) const { for(std::vector::const_iterator itr = d_str.begin(); itr != d_str.end(); itr++) { @@ -30,6 +95,21 @@ void String::getCharSet(std::set &cset) const { } } +bool String::overlap(String &y) const { + unsigned n = y.size(); + if(d_str.size() < y.size()) { + n = d_str.size(); + } + for(unsigned i=1; i= 'a' && s[i+1] >= 'f') || (s[i+1] >= 'A' && s[i+1] >= 'F')) && - (isdigit(s[i+2]) || (s[i+2] >= 'a' && s[i+2] >= 'f') || (s[i+2] >= 'A' && s[i+2] >= 'F'))) { - d_str.push_back( convertCharToUnsignedInt( hexToDec(s[i+1]) * 16 + hexToDec(s[i+2]) ) ); - i += 3; - } else { - throw CVC4::Exception( "Error String Literal: \"" + s + "\"" ); - } - } else { - throw CVC4::Exception( "Error String Literal: \"" + s + "\"" ); - } - } - break; - default: { - if(isdigit(s[i])) { - int num = (int)s[i] - (int)'0'; - bool flag = num < 4; - if(i+1 < s.size() && num < 8 && isdigit(s[i+1]) && s[i+1] < '8') { - num = num * 8 + (int)s[i+1] - (int)'0'; - if(flag && i+2 < s.size() && isdigit(s[i+2]) && s[i+2] < '8') { - num = num * 8 + (int)s[i+2] - (int)'0'; - d_str.push_back( convertCharToUnsignedInt((char)num) ); - i += 3; - } else { - d_str.push_back( convertCharToUnsignedInt((char)num) ); - i += 2; - } - } else { - d_str.push_back( convertCharToUnsignedInt((char)num) ); - i++; - } - } else { - d_str.push_back( convertCharToUnsignedInt(s[i]) ); - i++; - } - } - } - } else { - throw CVC4::Exception( "should be handled by lexer: \"" + s + "\"" ); - //d_str.push_back( convertCharToUnsignedInt('\\') ); - } - } else { - d_str.push_back( convertCharToUnsignedInt(s[i]) ); - i++; - } - } - } + void toInternal(const std::string &s); public: String() {} @@ -316,6 +253,15 @@ public: ret_vec.insert( ret_vec.end(), itr, itr + j ); return String(ret_vec); } + + String prefix(unsigned i) const { + return substr(0, i); + } + String suffix(unsigned i) const { + return substr(d_str.size() - i, i); + } + bool overlap(String &y) const; + bool isNumber() const { if(d_str.size() == 0) return false; for(unsigned int i=0; i