From db43ae511c2103f1e9718a8954e26cf7866d14a8 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Mon, 7 May 2018 10:22:27 -0500 Subject: [PATCH] Add support for str.code (#1821) --- src/parser/smt2/smt2.cpp | 1 + src/printer/smt2/smt2_printer.cpp | 1 + src/theory/strings/kinds | 2 + src/theory/strings/theory_strings.cpp | 374 +++++++++++++----- src/theory/strings/theory_strings.h | 38 +- .../strings/theory_strings_rewriter.cpp | 28 ++ src/theory/strings/theory_strings_rewriter.h | 6 + .../strings/theory_strings_type_rules.h | 4 +- src/util/regexp.cpp | 26 ++ src/util/regexp.h | 48 ++- test/regress/Makefile.tests | 5 + .../regress1/strings/code-sequence.smt2 | 14 + .../regress1/strings/str-code-sat.smt2 | 24 ++ .../regress1/strings/str-code-unsat-2.smt2 | 6 + .../regress1/strings/str-code-unsat-3.smt2 | 21 + .../regress1/strings/str-code-unsat.smt2 | 21 + 16 files changed, 507 insertions(+), 112 deletions(-) create mode 100644 test/regress/regress1/strings/code-sequence.smt2 create mode 100644 test/regress/regress1/strings/str-code-sat.smt2 create mode 100644 test/regress/regress1/strings/str-code-unsat-2.smt2 create mode 100644 test/regress/regress1/strings/str-code-unsat-3.smt2 create mode 100644 test/regress/regress1/strings/str-code-unsat.smt2 diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp index 1d66ab0c1..09dccdfbd 100644 --- a/src/parser/smt2/smt2.cpp +++ b/src/parser/smt2/smt2.cpp @@ -153,6 +153,7 @@ void Smt2::addStringOperators() { addOperator(kind::REGEXP_OPT, "re.opt"); addOperator(kind::REGEXP_RANGE, "re.range"); addOperator(kind::REGEXP_LOOP, "re.loop"); + addOperator(kind::STRING_CODE, "str.code"); } void Smt2::addFloatingPointOperators() { diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index 96bee9724..2c6a26335 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -523,6 +523,7 @@ void Smt2Printer::toStream(std::ostream& out, case kind::STRING_SUFFIX: case kind::STRING_ITOS: case kind::STRING_STOI: + case kind::STRING_CODE: case kind::STRING_TO_REGEXP: case kind::REGEXP_CONCAT: case kind::REGEXP_UNION: diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 15dd5b423..34237f69e 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -24,6 +24,7 @@ operator STRING_PREFIX 2 "string prefixof" operator STRING_SUFFIX 2 "string suffixof" operator STRING_ITOS 1 "integer to string" operator STRING_STOI 1 "string to integer (total function)" +operator STRING_CODE 1 "string to code, returns the code of the first character of the string if it has length one, -1 otherwise" #sort CHAR_TYPE \ # Cardinality::INTEGERS \ @@ -103,6 +104,7 @@ typerule STRING_PREFIX ::CVC4::theory::strings::StringPrefixOfTypeRule typerule STRING_SUFFIX ::CVC4::theory::strings::StringSuffixOfTypeRule typerule STRING_ITOS ::CVC4::theory::strings::StringIntToStrTypeRule typerule STRING_STOI ::CVC4::theory::strings::StringStrToIntTypeRule +typerule STRING_CODE ::CVC4::theory::strings::StringStrToIntTypeRule typerule STRING_IN_REGEXP ::CVC4::theory::strings::StringInRegExpTypeRule diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index c780caca2..b77a616b3 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -32,6 +32,7 @@ using namespace std; using namespace CVC4::context; +using namespace CVC4::kind; namespace CVC4 { namespace theory { @@ -73,13 +74,14 @@ Node TheoryStrings::TermIndex::add( TNode n, unsigned index, TheoryStrings* t, N } } - -TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, - OutputChannel& out, Valuation valuation, +TheoryStrings::TheoryStrings(context::Context* c, + context::UserContext* u, + OutputChannel& out, + Valuation valuation, const LogicInfo& logicInfo) : Theory(THEORY_STRINGS, c, u, out, valuation, logicInfo), RMAXINT(LONG_MAX), - d_notify( *this ), + d_notify(*this), d_equalityEngine(d_notify, c, "theory::strings", true), d_conflict(c, false), d_infer(c), @@ -98,7 +100,8 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, d_proxy_var(u), d_proxy_var_to_length(u), d_functionsTerms(c), - d_has_extf(c, false ), + d_has_extf(c, false), + d_has_str_code(false), d_regexp_memberships(c), d_regexp_ucached(u), d_regexp_ccached(c), @@ -121,11 +124,13 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, getExtTheory()->addFunctionKind(kind::STRING_STRREPL); getExtTheory()->addFunctionKind(kind::STRING_STRCTN); getExtTheory()->addFunctionKind(kind::STRING_IN_REGEXP); + getExtTheory()->addFunctionKind(kind::STRING_CODE); // The kinds we are treating as function application in congruence - d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP); d_equalityEngine.addFunctionKind(kind::STRING_LENGTH); d_equalityEngine.addFunctionKind(kind::STRING_CONCAT); + d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP); + d_equalityEngine.addFunctionKind(kind::STRING_CODE); if( options::stringLazyPreproc() ){ d_equalityEngine.addFunctionKind(kind::STRING_STRCTN); d_equalityEngine.addFunctionKind(kind::STRING_SUBSTR); @@ -410,7 +415,9 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) { if( d_preproc_cache.find( c_n )==d_preproc_cache.end() ){ d_preproc_cache[ c_n ] = true; Trace("strings-process-debug") << "Process reduction for " << n << ", pol = " << pol << std::endl; - if( n.getKind()==kind::STRING_STRCTN && pol==1 ){ + Kind k = n.getKind(); + if (k == kind::STRING_STRCTN && pol == 1) + { Node x = n[0]; Node s = n[1]; //positive contains reduces to a equality @@ -423,9 +430,13 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) { //we've reduced this n Trace("strings-extf-debug") << " resolve extf : " << n << " based on positive contain reduction." << std::endl; return 1; - }else{ - // for STRING_SUBSTR, STRING_STRCTN with pol=-1, - // STRING_STRIDOF, STRING_ITOS, STRING_STOI, STRING_STRREPL + } + else if (k != kind::STRING_CODE) + { + Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF + || k == STRING_ITOS + || k == STRING_STOI + || k == STRING_STRREPL); std::vector< Node > new_nodes; Node res = d_preproc.simplify( n, new_nodes ); Assert( res!=n ); @@ -478,6 +489,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) return false; } + NodeManager* nm = NodeManager::currentNM(); // Generate model std::vector< Node > nodes; getEquivalenceClasses( nodes ); @@ -521,20 +533,45 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) ////step 2 : assign arbitrary values for unknown lengths? // confirmed by calculus invariant, see paper Trace("strings-model") << "Assign to equivalence classes..." << std::endl; + std::map pure_eq_assign; //step 3 : assign values to equivalence classes that are pure variables for( unsigned i=0; i pure_eq; Trace("strings-model") << "The equivalence classes "; - for( unsigned j=0; jd_code_term.get().isNull()) + { + // its value must be equal to its code + Node ct = nm->mkNode(kind::STRING_CODE, eip->d_code_term.get()); + Node ctv = d_valuation.getModelValue(ct); + unsigned cvalue = + ctv.getConst().getNumerator().toUnsignedInt(); + Trace("strings-model") << "(code: " << cvalue << ") "; + std::vector vec; + vec.push_back(String::convertCodeToUnsignedInt(cvalue)); + Node mv = nm->mkConst(String(vec)); + pure_eq_assign[eqc] = mv; + m->getEqualityEngine()->addTerm( mv ); + } + } + pure_eq.push_back(eqc); } - }else{ - processed[col[i][j]] = col[i][j]; + } + else + { + processed[eqc] = eqc; } } Trace("strings-model") << "have length " << lts_values[i] << std::endl; @@ -547,7 +584,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) lvalue++; } Trace("strings-model") << "*** Decide to make length of " << lvalue << std::endl; - lts_values[i] = NodeManager::currentNM()->mkConst( Rational( lvalue ) ); + lts_values[i] = nm->mkConst(Rational(lvalue)); values_used[ lvalue ] = true; } Trace("strings-model") << "Need to assign values of length " << lts_values[i] << " to equivalence classes "; @@ -556,22 +593,33 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) } Trace("strings-model") << std::endl; - //use type enumerator Assert(lts_values[i].getConst() <= RMAXINT, "Exceeded LONG_MAX in string model"); StringEnumeratorLength sel(lts_values[i].getConst().getNumerator().toUnsignedInt()); - for( unsigned j=0; j::iterator itp = pure_eq_assign.find(eqc); + if (itp == pure_eq_assign.end()) + { Assert( !sel.isFinished() ); c = *sel; + while (m->hasTerm(c)) + { + ++sel; + Assert(!sel.isFinished()); + c = *sel; + } + ++sel; + } + else + { + c = itp->second; } - ++sel; - Trace("strings-model") << "*** Assigned constant " << c << " for " << pure_eq[j] << std::endl; - processed[pure_eq[j]] = c; - if (!m->assertEquality(pure_eq[j], c, true)) + Trace("strings-model") << "*** Assigned constant " << c << " for " + << eqc << std::endl; + processed[eqc] = c; + if (!m->assertEquality(eqc, c, true)) { return false; } @@ -645,9 +693,9 @@ void TheoryStrings::preRegisterTerm(TNode n) { break; } default: { + registerTerm(n, 0); TypeNode tn = n.getType(); if( tn.isString() ) { - registerTerm( n, 0 ); // if finite model finding is enabled, // then we minimize the length of this term if it is a variable // but not an internally generated Skolem, or a term that does @@ -840,8 +888,12 @@ void TheoryStrings::checkExtfReductions( int effort ) { } } -TheoryStrings::EqcInfo::EqcInfo( context::Context* c ) : d_length_term(c), d_cardinality_lem_k(c), d_normalized_length(c) { - +TheoryStrings::EqcInfo::EqcInfo(context::Context* c) + : d_length_term(c), + d_code_term(c), + d_cardinality_lem_k(c), + d_normalized_length(c) +{ } TheoryStrings::EqcInfo * TheoryStrings::getOrMakeEqcInfo( Node eqc, bool doMake ) { @@ -874,11 +926,20 @@ void TheoryStrings::conflict(TNode a, TNode b){ /** called when a new equivalance class is created */ void TheoryStrings::eqNotifyNewClass(TNode t){ - if( t.getKind() == kind::STRING_LENGTH ){ + Kind k = t.getKind(); + if (k == kind::STRING_LENGTH || k == kind::STRING_CODE) + { Trace("strings-debug") << "New length eqc : " << t << std::endl; Node r = d_equalityEngine.getRepresentative(t[0]); EqcInfo * ei = getOrMakeEqcInfo( r, true ); - ei->d_length_term = t[0]; + if (k == kind::STRING_LENGTH) + { + ei->d_length_term = t[0]; + } + else + { + ei->d_code_term = t[0]; + } //we care about the length of this string registerTerm( t[0], 1 ); }else{ @@ -895,6 +956,10 @@ void TheoryStrings::eqNotifyPreMerge(TNode t1, TNode t2){ if( !e2->d_length_term.get().isNull() ){ e1->d_length_term.set( e2->d_length_term ); } + if (!e2->d_code_term.get().isNull()) + { + e1->d_code_term.set(e2->d_code_term); + } if( e2->d_cardinality_lem_k.get()>e1->d_cardinality_lem_k.get() ) { e1->d_cardinality_lem_k.set( e2->d_cardinality_lem_k ); } @@ -2013,64 +2078,164 @@ void TheoryStrings::checkNormalForms(){ } } } - if( !hasProcessed() ){ - Trace("strings-process") << "Normalize equivalence classes...." << std::endl; - //calculate normal forms for each equivalence class, possibly adding splitting lemmas - d_normal_forms.clear(); - d_normal_forms_exp.clear(); - std::map< Node, Node > nf_to_eqc; - std::map< Node, Node > eqc_to_nf; - std::map< Node, Node > eqc_to_exp; - for( unsigned i=0; i nf_to_eqc; + std::map eqc_to_nf; + std::map eqc_to_exp; + for (const Node& eqc : d_strings_eqc) + { + Trace("strings-process-debug") << "- Verify normal forms are the same for " + << eqc << std::endl; + normalizeEquivalenceClass(eqc); + Trace("strings-debug") << "Finished normalizing eqc..." << std::endl; + if (hasProcessed()) + { + return; + } + Node nf_term = mkConcat(d_normal_forms[eqc]); + std::map::iterator itn = nf_to_eqc.find(nf_term); + if (itn != nf_to_eqc.end()) + { + // two equivalence classes have same normal form, merge + std::vector nf_exp; + nf_exp.push_back(mkAnd(d_normal_forms_exp[eqc])); + nf_exp.push_back(eqc_to_exp[itn->second]); + Node eq = + d_normal_forms_base[eqc].eqNode(d_normal_forms_base[itn->second]); + sendInference(nf_exp, eq, "Normal_Form"); if( hasProcessed() ){ return; - }else{ - Node nf_term = mkConcat( d_normal_forms[eqc] ); - std::map< Node, Node >::iterator itn = nf_to_eqc.find( nf_term ); - if( itn!=nf_to_eqc.end() ){ - //two equivalence classes have same normal form, merge - std::vector< Node > nf_exp; - nf_exp.push_back( mkAnd( d_normal_forms_exp[eqc] ) ); - nf_exp.push_back( eqc_to_exp[itn->second] ); - Node eq = d_normal_forms_base[eqc].eqNode( d_normal_forms_base[itn->second] ); - sendInference( nf_exp, eq, "Normal_Form" ); - } else { - nf_to_eqc[nf_term] = eqc; - eqc_to_nf[eqc] = nf_term; - eqc_to_exp[eqc] = mkAnd( d_normal_forms_exp[eqc] ); - } } - Trace("strings-process-debug") << "Done verifying normal forms are the same for " << eqc << std::endl; } - if( !hasProcessed() ){ - if(Trace.isOn("strings-nf")) { - Trace("strings-nf") << "**** Normal forms are : " << std::endl; - for( std::map< Node, Node >::iterator it = eqc_to_exp.begin(); it != eqc_to_exp.end(); ++it ){ - Trace("strings-nf") << " N[" << it->first << "] (base " << d_normal_forms_base[it->first] << ") = " << eqc_to_nf[it->first] << std::endl; - Trace("strings-nf") << " exp: " << it->second << std::endl; + else + { + nf_to_eqc[nf_term] = eqc; + eqc_to_nf[eqc] = nf_term; + eqc_to_exp[eqc] = mkAnd(d_normal_forms_exp[eqc]); + } + Trace("strings-process-debug") + << "Done verifying normal forms are the same for " << eqc << std::endl; + } + if (Trace.isOn("strings-nf")) + { + Trace("strings-nf") << "**** Normal forms are : " << std::endl; + for (std::map::iterator it = eqc_to_exp.begin(); + it != eqc_to_exp.end(); + ++it) + { + Trace("strings-nf") << " N[" << it->first << "] (base " + << d_normal_forms_base[it->first] + << ") = " << eqc_to_nf[it->first] << std::endl; + Trace("strings-nf") << " exp: " << it->second << std::endl; + } + Trace("strings-nf") << std::endl; + } + checkExtfEval(1); + Trace("strings-process-debug") + << "Done check extended functions re-eval, addedFact = " + << !d_pending.empty() << " " << !d_lemma_cache.empty() + << ", d_conflict = " << d_conflict << std::endl; + if (hasProcessed()) + { + return; + } + if (!options::stringEagerLen()) + { + checkLengthsEqc(); + if (hasProcessed()) + { + return; + } + } + // process disequalities between equivalence classes + checkDeqNF(); + Trace("strings-process-debug") + << "Done check disequalities, addedFact = " << !d_pending.empty() << " " + << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; + if (hasProcessed()) + { + return; + } + // ensure that lemmas regarding str.code been added for each constant string + // of length one + if (d_has_str_code) + { + NodeManager* nm = NodeManager::currentNM(); + // str.code applied to the code term for each equivalence class that has a + // code term but is not a constant + std::vector nconst_codes; + // str.code applied to the proxy variables for each equivalence classes that + // are constants of size one + std::vector const_codes; + for (const Node& eqc : d_strings_eqc) + { + if (d_normal_forms[eqc].size() == 1 && d_normal_forms[eqc][0].isConst()) + { + Node c = d_normal_forms[eqc][0]; + Trace("strings-code-debug") << "Get proxy variable for " << c + << std::endl; + Node cc = nm->mkNode(kind::STRING_CODE, c); + cc = Rewriter::rewrite(cc); + Assert(cc.isConst()); + NodeNodeMap::const_iterator it = d_proxy_var.find(c); + AlwaysAssert(it != d_proxy_var.end()); + Node vc = nm->mkNode(kind::STRING_CODE, (*it).second); + if (!areEqual(cc, vc)) + { + sendInference(d_empty_vec, cc.eqNode(vc), "Code_Proxy"); } - Trace("strings-nf") << std::endl; + const_codes.push_back(vc); } - checkExtfEval( 1 ); - Trace("strings-process-debug") << "Done check extended functions re-eval, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - if( !hasProcessed() ){ - if( !options::stringEagerLen() ){ - checkLengthsEqc(); - if( hasProcessed() ){ - return; - } + else + { + EqcInfo* ei = getOrMakeEqcInfo(eqc, false); + if (ei && !ei->d_code_term.get().isNull()) + { + Node vc = nm->mkNode(kind::STRING_CODE, ei->d_code_term.get()); + nconst_codes.push_back(vc); + } + } + } + if (hasProcessed()) + { + return; + } + // now, ensure that str.code is injective + std::vector cmps; + cmps.insert(cmps.end(), const_codes.rbegin(), const_codes.rend()); + cmps.insert(cmps.end(), nconst_codes.rbegin(), nconst_codes.rend()); + for (unsigned i = 0, num_ncc = nconst_codes.size(); i < num_ncc; i++) + { + Node c1 = nconst_codes[i]; + cmps.pop_back(); + for (const Node& c2 : cmps) + { + Trace("strings-code-debug") << "Compare codes : " << c1 << " " << c2 + << std::endl; + if (!areDisequal(c1, c2)) + { + Node eqn = c1[0].eqNode(c2[0]); + Node eq = c1.eqNode(c2); + Node inj_lem = nm->mkNode(kind::OR, eq.negate(), eqn); + sendInference(d_empty_vec, inj_lem, "Code_Inj"); } - //process disequalities between equivalence classes - checkDeqNF(); - Trace("strings-process-debug") << "Done check disequalities, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; } } - Trace("strings-solve") << "Finished check normal forms, #lemmas = " << d_lemma_cache.size() << ", conflict = " << d_conflict << std::endl; } + Trace("strings-process-debug") + << "Done check code, addedFact = " << !d_pending.empty() << " " + << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; + Trace("strings-solve") << "Finished check normal forms, #lemmas = " + << d_lemma_cache.size() + << ", conflict = " << d_conflict << std::endl; } //compute d_normal_forms_(base,exp,exp_depend)[eqc] @@ -3278,21 +3443,25 @@ bool TheoryStrings::isNormalFormPair2( Node n1, Node n2 ) { } void TheoryStrings::registerTerm( Node n, int effort ) { - // 0 : upon preregistration or internal assertion - // 1 : upon occurrence in length term - // 2 : before normal form computation - // 3 : called on normal form terms - bool do_register = false; - if( options::stringEagerLen() ){ - do_register = effort==0; - }else{ - do_register = effort>0 || n.getKind()!=kind::STRING_CONCAT; + TypeNode tn = n.getType(); + bool do_register = true; + if (!tn.isString()) + { + if (options::stringEagerLen()) + { + do_register = effort == 0; + } + else + { + do_register = effort > 0 || n.getKind() != kind::STRING_CONCAT; + } } if( do_register ){ if(d_registered_terms_cache.find(n) == d_registered_terms_cache.end()) { d_registered_terms_cache.insert(n); Debug("strings-register") << "TheoryStrings::registerTerm() " << n << ", effort = " << effort << std::endl; - if(n.getType().isString()) { + if (tn.isString()) + { //register length information: // for variables, split on empty vs positive length // for concat/const/replace, introduce proxy var and state length relation @@ -3344,10 +3513,25 @@ void TheoryStrings::registerTerm( Node n, int effort ) { Trace("strings-lemma-debug") << " prerewrite : " << skl.eqNode( lsum ) << std::endl; Trace("strings-assert") << "(assert " << ceq << ")" << std::endl; d_out->lemma(ceq); - } - } else { - AlwaysAssert(false, "String Terms only in registerTerm."); + } + else if (n.getKind() == kind::STRING_CODE) + { + d_has_str_code = true; + NodeManager* nm = NodeManager::currentNM(); + // ite( str.len(s)==1, 0 <= str.code(s) < num_codes, str.code(s)=-1 ) + Node neg_one = nm->mkConst(Rational(-1)); + Node code_len = mkLength(n[0]).eqNode(d_one); + Node code_eq_neg1 = n.eqNode(nm->mkConst(Rational(-1))); + Node code_range = nm->mkNode( + kind::AND, + nm->mkNode(kind::GEQ, n, d_zero), + nm->mkNode( + kind::LT, n, nm->mkConst(Rational(CVC4::String::num_codes())))); + Node lem = nm->mkNode(kind::ITE, code_len, code_range, code_eq_neg1); + Trace("strings-lemma") << "Strings::Lemma CODE : " << lem << std::endl; + Trace("strings-assert") << "(assert " << lem << ")" << std::endl; + d_out->lemma(lem); } } } diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 22406adef..85d2754a8 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -294,18 +294,31 @@ private: EqualityStatus getEqualityStatus(TNode a, TNode b) override; private: + /** SAT-context-dependent information about an equivalence class */ class EqcInfo { public: EqcInfo( context::Context* c ); ~EqcInfo(){} - //constant in this eqc + /** + * If non-null, this is a term x from this eq class such that str.len( x ) + * occurs as a term in this SAT context. + */ context::CDO< Node > d_length_term; + /** + * If non-null, this is a term x from this eq class such that str.code( x ) + * occurs as a term in this SAT context. + */ + context::CDO d_code_term; context::CDO< unsigned > d_cardinality_lem_k; - // 1 = added length lemma context::CDO< Node > d_normalized_length; }; /** map from representatives to information necessary for equivalence classes */ std::map< Node, EqcInfo* > d_eqc_info; + /** + * Get the above information for equivalence class eqc. If doMake is true, + * we construct a new information class if one does not exist. The term eqc + * should currently be a representative of the equality engine of this class. + */ EqcInfo * getOrMakeEqcInfo( Node eqc, bool doMake = true ); //maintain which concat terms have the length lemma instantiated NodeNodeMap d_proxy_var; @@ -315,6 +328,8 @@ private: private: //any non-reduced extended functions exist context::CDO< bool > d_has_extf; + /** have we asserted any str.code terms? */ + bool d_has_str_code; // static information about extf class ExtfInfo { public: @@ -448,7 +463,24 @@ private: void addToExplanation(Node a, Node b, std::vector& exp); void addToExplanation(Node lit, std::vector& exp); - // register term + /** Register term + * + * This performs SAT-context-independent registration for a term n, which + * may cause lemmas to be sent on the output channel that involve + * "initial refinement lemmas" for n. This includes introducing proxy + * variables for string terms and asserting that str.code terms are within + * proper bounds. + * + * Effort is one of the following (TODO make enum #1881): + * 0 : upon preregistration or internal assertion + * 1 : upon occurrence in length term + * 2 : before normal form computation + * 3 : called on normal form terms + * + * Based on the strategy, we may choose to add these initial refinement + * lemmas at one of the following efforts, where if it is not the given + * effort, the call to this method does nothing. + */ void registerTerm(Node n, int effort); // send lemma void sendInference(std::vector& exp, diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 1c885434c..cfd0f6e73 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -1224,6 +1224,10 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } else if(node.getKind() == kind::STRING_IN_REGEXP) { retNode = rewriteMembership(node); } + else if (node.getKind() == STRING_CODE) + { + retNode = rewriteStringCode(node); + } Trace("strings-postrewrite") << "Strings::postRewrite returning " << retNode << std::endl; if( orig!=retNode ){ @@ -2258,6 +2262,30 @@ Node TheoryStringsRewriter::rewritePrefixSuffix(Node n) return retNode; } +Node TheoryStringsRewriter::rewriteStringCode(Node n) +{ + Assert(n.getKind() == kind::STRING_CODE); + if (n[0].isConst()) + { + CVC4::String s = n[0].getConst(); + Node ret; + if (s.size() == 1) + { + std::vector vec = s.getVec(); + Assert(vec.size() == 1); + ret = NodeManager::currentNM()->mkConst( + Rational(CVC4::String::convertUnsignedIntToCode(vec[0]))); + } + else + { + ret = NodeManager::currentNM()->mkConst(Rational(-1)); + } + return returnRewrite(n, ret, "code-eval"); + } + + return n; +} + void TheoryStringsRewriter::getConcat( Node n, std::vector< Node >& c ) { if( n.getKind()==kind::STRING_CONCAT || n.getKind()==kind::REGEXP_CONCAT ){ for( unsigned i=0; iintegerType(); diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index 9aaad522a..4cbda5147 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -32,6 +32,32 @@ namespace CVC4 { static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values."); +unsigned String::convertCharToUnsignedInt(unsigned char c) +{ + return convertCodeToUnsignedInt(static_cast(c)); +} +unsigned char String::convertUnsignedIntToChar(unsigned i) +{ + Assert(i < num_codes()); + return static_cast(convertUnsignedIntToCode(i)); +} +bool String::isPrintable(unsigned i) +{ + Assert(i < num_codes()); + unsigned char c = convertUnsignedIntToChar(i); + return (c >= ' ' && c <= '~'); +} +unsigned String::convertCodeToUnsignedInt(unsigned c) +{ + Assert(c < num_codes()); + return (c < start_code() ? c + num_codes() : c) - start_code(); +} +unsigned String::convertUnsignedIntToCode(unsigned i) +{ + Assert(i < num_codes()); + return (i + start_code()) % num_codes(); +} + int String::cmp(const String &y) const { if (size() != y.size()) { return size() < y.size() ? -1 : 1; diff --git a/src/util/regexp.h b/src/util/regexp.h index 42d232259..c8b491475 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -28,21 +28,43 @@ namespace CVC4 { +/** The CVC4 string class + * + * This data structure is the domain of values for the string type. It can also + * be used as a generic utility for representing strings. + */ class CVC4_PUBLIC String { public: - static unsigned convertCharToUnsignedInt(unsigned char c) { - unsigned i = c; - i = i + 191; - return (i >= 256 ? i - 256 : i); - } - static unsigned char convertUnsignedIntToChar(unsigned i) { - unsigned ii = i + 65; - return (unsigned char)(ii >= 256 ? ii - 256 : ii); - } - static bool isPrintable(unsigned i) { - unsigned char c = convertUnsignedIntToChar(i); - return (c >= ' ' && c <= '~'); // isprint( (int)c ); - } + /** + * The start ASCII code. In our string representation below, we represent + * characters using a vector d_vec of unsigned integers. We refer to this as + * the "internal representation" for the string. + * + * We make unsigned integer 0 correspond to the 65th character ("A") in the + * ASCII alphabet to make models intuitive. In particular, say if we have + * a set of string variables that are distinct but otherwise unconstrained, + * then the model may assign them "A", "B", "C", ... + */ + static inline unsigned start_code() { return 65; } + /** + * This is the cardinality of the alphabet that is representable by this + * class. Notice that this must be greater than or equal to the cardinality + * of the alphabet that the string theory reasons about. + */ + static inline unsigned num_codes() { return 256; } + /** + * Convert unsigned char to the unsigned used in the internal representation + * in d_vec below. + */ + static unsigned convertCharToUnsignedInt(unsigned char c); + /** Convert the internal unsigned to a unsigned char. */ + static unsigned char convertUnsignedIntToChar(unsigned i); + /** Does the internal unsigned correspond to a printable character? */ + static bool isPrintable(unsigned i); + /** get the internal unsigned for ASCII code c. */ + static unsigned convertCodeToUnsignedInt(unsigned c); + /** get the ASCII code number that internal unsigned i corresponds to. */ + static unsigned convertUnsignedIntToCode(unsigned i); /** constructors for String * diff --git a/test/regress/Makefile.tests b/test/regress/Makefile.tests index 0a0732753..8584eeca9 100644 --- a/test/regress/Makefile.tests +++ b/test/regress/Makefile.tests @@ -1410,6 +1410,7 @@ REG1_TESTS = \ regress1/strings/cmu-5042-0707-2.smt2 \ regress1/strings/cmu-inc-nlpp-071516.smt2 \ regress1/strings/cmu-substr-rw.smt2 \ + regress1/strings/code-sequence.smt2 \ regress1/strings/crash-1019.smt2 \ regress1/strings/csp-prefix-exp-bug.smt2 \ regress1/strings/double-replace.smt2 \ @@ -1452,6 +1453,10 @@ REG1_TESTS = \ regress1/strings/string-unsound-sem.smt2 \ regress1/strings/strings-index-empty.smt2 \ regress1/strings/strip-endpt-sound.smt2 \ + regress1/strings/str-code-sat.smt2 \ + regress1/strings/str-code-unsat.smt2 \ + regress1/strings/str-code-unsat-2.smt2 \ + regress1/strings/str-code-unsat-3.smt2 \ regress1/strings/substr001.smt2 \ regress1/strings/type002.smt2 \ regress1/strings/type003.smt2 \ diff --git a/test/regress/regress1/strings/code-sequence.smt2 b/test/regress/regress1/strings/code-sequence.smt2 new file mode 100644 index 000000000..2654017d4 --- /dev/null +++ b/test/regress/regress1/strings/code-sequence.smt2 @@ -0,0 +1,14 @@ +(set-logic SLIA) +(set-option :strings-exp true) +(set-option :strings-fmf true) +(set-option :fmf-bound true) +(set-info :status sat) +(declare-fun x () String) +(assert (forall ((n Int)) (=> (and (<= 0 n) (< n (str.len x))) +(and +(<= 60 (str.code (str.at x n))) +(<= (str.code (str.at x n)) 90) +(< (+ 1 (str.code (str.at x (- n 1)))) (str.code (str.at x n))) +)))) +(assert (> (str.len x) 3)) +(check-sat) diff --git a/test/regress/regress1/strings/str-code-sat.smt2 b/test/regress/regress1/strings/str-code-sat.smt2 new file mode 100644 index 000000000..1acc091c1 --- /dev/null +++ b/test/regress/regress1/strings/str-code-sat.smt2 @@ -0,0 +1,24 @@ +(set-logic QF_SLIA) +(set-info :status sat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) +(declare-fun w () String) + +(assert (>= (str.code x) 65)) +(assert (<= (str.code x) 75)) + +(assert (>= (str.code y) 65)) +(assert (<= (str.code y) 75)) + +(assert (>= (str.code z) 65)) +(assert (<= (str.code z) 75)) + +(assert (= (str.len w) 1)) + +(assert (= (+ (str.code x) (str.code y)) 140)) +(assert (= (+ (str.code x) (str.code z)) 141)) + +(assert (distinct x y z w "A" "B" "C" "D" "AA")) + +(check-sat) diff --git a/test/regress/regress1/strings/str-code-unsat-2.smt2 b/test/regress/regress1/strings/str-code-unsat-2.smt2 new file mode 100644 index 000000000..38116061e --- /dev/null +++ b/test/regress/regress1/strings/str-code-unsat-2.smt2 @@ -0,0 +1,6 @@ +(set-logic QF_SLIA) +(set-info :status unsat) +(declare-fun x () String) +(assert (= (str.len x) 1)) +(assert (or (< (str.code x) 0) (> (str.code x) 10000000000000000000000000000))) +(check-sat) diff --git a/test/regress/regress1/strings/str-code-unsat-3.smt2 b/test/regress/regress1/strings/str-code-unsat-3.smt2 new file mode 100644 index 000000000..fa34785c2 --- /dev/null +++ b/test/regress/regress1/strings/str-code-unsat-3.smt2 @@ -0,0 +1,21 @@ +(set-logic QF_SLIA) +(set-info :status unsat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) + +(assert (>= (str.code x) 65)) +(assert (<= (str.code x) 75)) + +(assert (>= (str.code y) 65)) +(assert (<= (str.code y) 75)) + +(assert (>= (str.code z) 65)) +(assert (<= (str.code z) 75)) + +(assert (= (+ (str.code x) (str.code y)) 140)) +(assert (= (+ (str.code x) (str.code z)) 141)) + +(assert (distinct x y z "B" "C" "D" "E")) + +(check-sat) diff --git a/test/regress/regress1/strings/str-code-unsat.smt2 b/test/regress/regress1/strings/str-code-unsat.smt2 new file mode 100644 index 000000000..6a3e9062b --- /dev/null +++ b/test/regress/regress1/strings/str-code-unsat.smt2 @@ -0,0 +1,21 @@ +(set-logic QF_SLIA) +(set-info :status unsat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) + +(assert (>= (str.code x) 65)) +(assert (<= (str.code x) 75)) + +(assert (>= (str.code y) 65)) +(assert (<= (str.code y) 75)) + +(assert (>= (str.code z) 65)) +(assert (<= (str.code z) 75)) + +(assert (= (+ (str.code x) (str.code y)) 140)) +(assert (= (+ (str.code x) (str.code z)) 140)) + +(assert (distinct x y z)) + +(check-sat) -- 2.30.2