From 04154c08c1af81bb751376ae9379c071a95c5a3f Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 1 Jul 2020 08:46:07 -0500 Subject: [PATCH] Inferences and model construction taking into account seq.unit (#4607) Towards theory of sequences. This makes the strings solver handle seq.unit, which requires two new inferences and updates to its model construction. It also fixes a bug in the best content heuristic, which previously failed to update the best score. --- src/theory/strings/base_solver.cpp | 109 ++++++++++++++++---- src/theory/strings/core_solver.cpp | 11 +- src/theory/strings/extf_solver.cpp | 21 ++-- src/theory/strings/infer_info.cpp | 2 + src/theory/strings/infer_info.h | 4 + src/theory/strings/rewrites.cpp | 1 + src/theory/strings/rewrites.h | 1 + src/theory/strings/sequences_rewriter.cpp | 10 +- src/theory/strings/theory_strings.cpp | 18 +++- src/theory/strings/theory_strings_utils.cpp | 2 + src/theory/strings/theory_strings_utils.h | 9 ++ 11 files changed, 150 insertions(+), 38 deletions(-) diff --git a/src/theory/strings/base_solver.cpp b/src/theory/strings/base_solver.cpp index 1af6c89ca..952f26691 100644 --- a/src/theory/strings/base_solver.cpp +++ b/src/theory/strings/base_solver.cpp @@ -15,6 +15,7 @@ #include "theory/strings/base_solver.h" +#include "expr/sequence.h" #include "options/strings_options.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" @@ -54,7 +55,7 @@ void BaseSolver::checkInit() if (!tn.isRegExp()) { Node emps; - if (tn.isString()) + if (tn.isStringLike()) { d_stringsEqc.push_back(eqc); emps = Word::mkEmptyWord(tn); @@ -64,19 +65,75 @@ void BaseSolver::checkInit() while (!eqc_i.isFinished()) { Node n = *eqc_i; - if (n.isConst()) + Kind k = n.getKind(); + // process constant-like terms + if (utils::isConstantLike(n)) { - d_eqcInfo[eqc].d_bestContent = n; - d_eqcInfo[eqc].d_base = n; - d_eqcInfo[eqc].d_exp = Node::null(); + Node prev = d_eqcInfo[eqc].d_bestContent; + if (!prev.isNull()) + { + // we have either (seq.unit x) = C, or (seq.unit x) = (seq.unit y) + // where C is a sequence constant. + Node cval = + prev.isConst() ? prev : (n.isConst() ? n : Node::null()); + std::vector exp; + exp.push_back(prev.eqNode(n)); + Node s, t; + if (cval.isNull()) + { + // injectivity of seq.unit + s = prev[0]; + t = n[0]; + } + else + { + // should not have two constants in the same equivalence class + Assert(cval.getType().isSequence()); + std::vector cchars = Word::getChars(cval); + if (cchars.size() == 1) + { + Node oval = prev.isConst() ? n : prev; + Assert(oval.getKind() == SEQ_UNIT); + s = oval[0]; + t = cchars[0] + .getConst() + .getSequence() + .getVec()[0]; + // oval is congruent (ignored) in this context + d_congruent.insert(oval); + } + else + { + // (seq.unit x) = C => false if |C| != 1. + d_im.sendInference( + exp, d_false, Inference::UNIT_CONST_CONFLICT); + return; + } + } + if (!d_state.areEqual(s, t)) + { + // (seq.unit x) = (seq.unit y) => x=y, or + // (seq.unit x) = (seq.unit c) => x=c + Assert(s.getType() == t.getType()); + d_im.sendInference(exp, s.eqNode(t), Inference::UNIT_INJ); + } + } + // update best content + if (prev.isNull() || n.isConst()) + { + d_eqcInfo[eqc].d_bestContent = n; + d_eqcInfo[eqc].d_bestScore = 0; + d_eqcInfo[eqc].d_base = n; + d_eqcInfo[eqc].d_exp = Node::null(); + } } - else if (tn.isInteger()) + if (tn.isInteger()) { // do nothing } + // process indexing else if (n.getNumChildren() > 0) { - Kind k = n.getKind(); if (k != EQUAL) { if (d_congruent.find(n) == d_congruent.end()) @@ -87,7 +144,7 @@ void BaseSolver::checkInit() { // check if we have inferred a new equality by removal of empty // components - if (n.getKind() == STRING_CONCAT && !d_state.areEqual(nc, n)) + if (k == STRING_CONCAT && !d_state.areEqual(nc, n)) { std::vector exp; size_t count[2] = {0, 0}; @@ -188,7 +245,7 @@ void BaseSolver::checkInit() } } } - else + else if (!n.isConst()) { if (d_congruent.find(n) == d_congruent.end()) { @@ -351,6 +408,7 @@ void BaseSolver::checkConstantEquivalenceClasses(TermIndex* ti, Node nct = utils::mkNConcat(vecnc, n.getType()); Assert(!nct.isConst()); bei.d_bestContent = nct; + bei.d_bestScore = contentSize; bei.d_base = n; if (!exp.empty()) { @@ -449,8 +507,26 @@ void BaseSolver::checkCardinalityType(TypeNode tn, std::vector >& cols, std::vector& lts) { + Trace("strings-card") << "Check cardinality (type " << tn << ")..." + << std::endl; NodeManager* nm = NodeManager::currentNM(); - Trace("strings-card") << "Check cardinality...." << std::endl; + uint32_t typeCardSize; + if (tn.isString()) // string-only + { + typeCardSize = d_cardSize; + } + else + { + Assert(tn.isSequence()); + TypeNode etn = tn.getSequenceElementType(); + if (etn.isInterpretedFinite()) + { + // infinite cardinality, we are fine + return; + } + // TODO (cvc4-projects #23): how to handle sequence for finite types? + return; + } // for each collection for (unsigned i = 0, csize = cols.size(); i < csize; ++i) { @@ -462,23 +538,18 @@ void BaseSolver::checkCardinalityType(TypeNode tn, // no restriction on sets in the partition of size 1 continue; } - if (!tn.isString()) - { - // TODO (cvc4-projects #23): how to handle sequence for finite types? - continue; - } // size > c^k unsigned card_need = 1; double curr = static_cast(cols[i].size()); - while (curr > d_cardSize) + while (curr > typeCardSize) { - curr = curr / static_cast(d_cardSize); + curr = curr / static_cast(typeCardSize); card_need++; } Trace("strings-card") << "Need length " << card_need - << " for this number of strings (where alphabet size is " << d_cardSize - << ")." << std::endl; + << " for this number of strings (where alphabet size is " + << typeCardSize << ") given type " << tn << "." << std::endl; // check if we need to split bool needsSplit = true; if (lr.isConst()) diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index 3bc953f2c..a38d16279 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -707,15 +707,17 @@ void CoreSolver::getNormalForms(Node eqc, while( !eqc_i.isFinished() ){ Node n = (*eqc_i); if( !d_bsolver.isCongruent(n) ){ - if (n.isConst() || n.getKind() == STRING_CONCAT) + Kind nk = n.getKind(); + bool isCLike = utils::isConstantLike(n); + if (isCLike || nk == STRING_CONCAT) { Trace("strings-process-debug") << "Get Normal Form : Process term " << n << " in eqc " << eqc << std::endl; NormalForm nf_curr; - if (n.isConst()) + if (isCLike) { nf_curr.init(n); } - else if (n.getKind() == STRING_CONCAT) + else if (nk == STRING_CONCAT) { // set the base to n, we construct the other portions of nf_curr in // the following. @@ -791,7 +793,8 @@ void CoreSolver::getNormalForms(Node eqc, } //if not equal to self std::vector& currv = nf_curr.d_nf; - if (currv.size() > 1 || (currv.size() == 1 && currv[0].isConst())) + if (currv.size() > 1 + || (currv.size() == 1 && utils::isConstantLike(currv[0]))) { // if in a build with assertions, check that normal form is acyclic if (Configuration::isAssertionBuild()) diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index 961d976cf..4ee2f4919 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -65,6 +65,7 @@ ExtfSolver::ExtfSolver(context::Context* c, d_extt.addFunctionKind(kind::STRING_TOLOWER); d_extt.addFunctionKind(kind::STRING_TOUPPER); d_extt.addFunctionKind(kind::STRING_REV); + d_extt.addFunctionKind(kind::SEQ_UNIT); d_true = NodeManager::currentNM()->mkConst(true); d_false = NodeManager::currentNM()->mkConst(false); @@ -138,16 +139,18 @@ bool ExtfSolver::doReduction(int effort, Node n) } } } - else + else if (k == STRING_SUBSTR) { - if (k == STRING_SUBSTR) - { - r_effort = 1; - } - else if (k != STRING_IN_REGEXP) - { - r_effort = 2; - } + r_effort = 1; + } + else if (k == SEQ_UNIT) + { + // never necessary to reduce seq.unit + return false; + } + else if (k != STRING_IN_REGEXP) + { + r_effort = 2; } if (effort != r_effort) { diff --git a/src/theory/strings/infer_info.cpp b/src/theory/strings/infer_info.cpp index 9640a57e2..c75e03440 100644 --- a/src/theory/strings/infer_info.cpp +++ b/src/theory/strings/infer_info.cpp @@ -26,6 +26,8 @@ const char* toString(Inference i) case Inference::I_CONST_MERGE: return "I_CONST_MERGE"; case Inference::I_CONST_CONFLICT: return "I_CONST_CONFLICT"; case Inference::I_NORM: return "I_NORM"; + case Inference::UNIT_INJ: return "UNIT_INJ"; + case Inference::UNIT_CONST_CONFLICT: return "UNIT_CONST_CONFLICT"; case Inference::CARD_SP: return "CARD_SP"; case Inference::CARDINALITY: return "CARDINALITY"; case Inference::I_CYCLE_E: return "I_CYCLE_E"; diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h index 1e37dc5b0..2a42b9fab 100644 --- a/src/theory/strings/infer_info.h +++ b/src/theory/strings/infer_info.h @@ -59,6 +59,10 @@ enum class Inference : uint32_t // equal after e.g. removing strings that are currently empty. For example: // y = "" ^ z = "" => x ++ y = z ++ x I_NORM, + // injectivity of seq.unit + UNIT_INJ, + // unit constant conflict + UNIT_CONST_CONFLICT, // A split due to cardinality CARD_SP, // The cardinality inference for strings, see Liang et al CAV 2014. diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index 09d496f06..68b510de6 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -202,6 +202,7 @@ const char* toString(Rewrite r) case Rewrite::LEN_CONCAT: return "LEN_CONCAT"; case Rewrite::LEN_REPL_INV: return "LEN_REPL_INV"; case Rewrite::LEN_CONV_INV: return "LEN_CONV_INV"; + case Rewrite::LEN_SEQ_UNIT: return "LEN_SEQ_UNIT"; case Rewrite::CHARAT_ELIM: return "CHARAT_ELIM"; case Rewrite::SEQ_UNIT_EVAL: return "SEQ_UNIT_EVAL"; default: return "?"; diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index a2cf5efc8..ccbdbc0cd 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -205,6 +205,7 @@ enum class Rewrite : uint32_t LEN_CONCAT, LEN_REPL_INV, LEN_CONV_INV, + LEN_SEQ_UNIT, CHARAT_ELIM, SEQ_UNIT_EVAL }; diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 20b892d0f..bb0fa1d97 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -576,6 +576,11 @@ Node SequencesRewriter::rewriteLength(Node node) Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]); return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV); } + else if (nk0 == SEQ_UNIT) + { + Node retNode = nm->mkConst(Rational(1)); + return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT); + } return node; } @@ -1836,6 +1841,7 @@ Node SequencesRewriter::rewriteContains(Node node) nb << emp.eqNode(t); for (const Node& c : vec) { + Assert(c.getType() == t.getType()); nb << c.eqNode(t); } @@ -3171,7 +3177,7 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype) Assert(ratLen.getDenominator() == 1); Integer intLen = ratLen.getNumerator(); uint32_t u = intLen.getUnsignedInt(); - if (stype.isString()) + if (stype.isString()) // string-only { res = nm->mkConst(String(std::string(u, 'A'))); } @@ -3233,7 +3239,7 @@ Node SequencesRewriter::rewriteSeqUnit(Node node) { std::vector seq; seq.push_back(node[0].toExpr()); - TypeNode stype = nm->mkSequenceType(node[0].getType()); + TypeNode stype = node[0].getType(); Node ret = nm->mkConst(ExprSequence(stype.toType(), seq)); return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL); } diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index ae8b3b682..d1b18df13 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -363,9 +363,15 @@ bool TheoryStrings::collectModelInfoType( NormalForm& nfe = d_csolver->getNormalForm(eqc); if (nfe.d_nf.size() == 1) { + // is it an equivalence class with a seq.unit term? + if (nfe.d_nf[0].getKind() == SEQ_UNIT) + { + pure_eq_assign[eqc] = nfe.d_nf[0]; + Trace("strings-model") << "(unit: " << nfe.d_nf[0] << ") "; + } // does it have a code and the length of these equivalence classes are // one? - if (d_termReg.hasStringCode() && lts_values[i] == d_one) + else if (d_termReg.hasStringCode() && lts_values[i] == d_one) { EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false); if (eip && !eip->d_codeTerm.get().isNull()) @@ -419,7 +425,7 @@ bool TheoryStrings::collectModelInfoType( std::unique_ptr sel; Trace("strings-model") << "Cardinality of alphabet is " << utils::getAlphabetCardinality() << std::endl; - if (tn.isString()) + if (tn.isString()) // string-only { sel.reset(new StringEnumLen( currLen, currLen, utils::getAlphabetCardinality())); @@ -535,7 +541,6 @@ bool TheoryStrings::collectModelInfoType( nc.push_back(r.isConst() ? r : processed[r]); } Node cc = utils::mkNConcat(nc, tn); - Assert(cc.isConst()); Trace("strings-model") << "*** Determined constant " << cc << " for " << rn << std::endl; processed[rn] = cc; @@ -543,12 +548,17 @@ bool TheoryStrings::collectModelInfoType( { // this should never happen due to the model soundness argument // for strings - Unreachable() << "TheoryStrings::collectModelInfoType: " "Inconsistent equality (unprocessed eqc)" << std::endl; return false; } + else if (!cc.isConst()) + { + // the value may be specified by seq.unit components, ensure this + // is marked as the skeleton for constructing values in this class. + m->assertSkeleton(cc); + } } } //Trace("strings-model") << "String Model : Assigned." << std::endl; diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index f77684309..3cf14fead 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -274,6 +274,8 @@ std::pair > collectEmptyEqs(Node x) allEmptyEqs, std::vector(emptyNodes.begin(), emptyNodes.end())); } +bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; } + bool isUnboundedWildcard(const std::vector& rs, size_t start) { size_t i = start; diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index e0480f32b..803a5ffea 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -147,6 +147,15 @@ Node mkSubstrChain(Node base, */ std::pair > collectEmptyEqs(Node x); +/** + * Return if a string-like term n is "constant-like", that is, either a + * constant string/sequence, or an application of seq.unit. + * + * @param n The string-like term + * @return true if n is constant-like. + */ +bool isConstantLike(Node n); + /** * Given a vector of regular expression nodes and a start index that points to * a wildcard, returns true if the wildcard is unbounded (i.e. it is followed -- 2.30.2