From: Andrew Reynolds Date: Thu, 2 Jun 2022 20:02:55 +0000 (-0500) Subject: Preparation for SEQ_NTH applied to strings (#8779) X-Git-Tag: cvc5-1.0.1~75 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e83a270a02e236ddc25cf82a3ee0ba073ea8fe77;p=cvc5.git Preparation for SEQ_NTH applied to strings (#8779) No behavior changes in this commit for current main. --- diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index 3b154ea93..6a7b8ccb6 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -1265,6 +1265,7 @@ std::string Smt2Printer::smtKindString(Kind k, Variant v) case kind::STRING_STOI: return "str.to_int"; case kind::STRING_IN_REGEXP: return "str.in_re"; case kind::STRING_TO_REGEXP: return "str.to_re"; + case kind::STRING_UNIT: return "str.unit"; case kind::REGEXP_NONE: return "re.none"; case kind::REGEXP_ALL: return "re.all"; case kind::REGEXP_ALLCHAR: return "re.allchar"; diff --git a/src/theory/evaluator.cpp b/src/theory/evaluator.cpp index 82c1038a6..54f845b44 100644 --- a/src/theory/evaluator.cpp +++ b/src/theory/evaluator.cpp @@ -610,7 +610,7 @@ EvalResult Evaluator::evalInternal( Integer i = results[currNode[1]].d_rat.getNumerator(); if (i.strictlyNegative() || i >= s_len) { - results[currNode] = EvalResult(s); + results[currNode] = EvalResult(Rational(-1)); } else { diff --git a/src/theory/strings/array_core_solver.cpp b/src/theory/strings/array_core_solver.cpp index 2dbc6838e..1a264d7c4 100644 --- a/src/theory/strings/array_core_solver.cpp +++ b/src/theory/strings/array_core_solver.cpp @@ -66,16 +66,20 @@ void ArrayCoreSolver::checkNth(const std::vector& nthTerms) std::vector extractTerms = d_esolver.getActive(STRING_SUBSTR); for (const Node& n : extractTerms) { - if (d_termReg.isHandledUpdate(n)) + if (d_termReg.isHandledUpdateOrSubstr(n)) { - // (seq.extract A i l) ^ (<= 0 i) ^ (< i (str.len A)) --> (seq.unit - // (seq.nth A i)) + // (seq.extract A i l) in terms: + // IF (<= 0 i) ^ (< i (str.len A)) + // THEN (seq.extract A i l) = (seq.unit (seq.nth A i)) + // ELSE (seq.extract A i l) = empty std::vector exp; Node cond1 = nm->mkNode(LEQ, nm->mkConstInt(Rational(0)), n[1]); Node cond2 = nm->mkNode(LT, n[1], nm->mkNode(STRING_LENGTH, n[0])); Node cond = nm->mkNode(AND, cond1, cond2); - Node body1 = nm->mkNode( - EQUAL, n, nm->mkNode(SEQ_UNIT, nm->mkNode(SEQ_NTH, n[0], n[1]))); + TypeNode tn = n.getType(); + Node nth = nm->mkNode(SEQ_NTH, n[0], n[1]); + Node unit = utils::mkUnit(tn, nth); + Node body1 = nm->mkNode(EQUAL, n, unit); Node body2 = nm->mkNode(EQUAL, n, Word::mkEmptyWord(n.getType())); Node lem = nm->mkNode(ITE, cond, body1, body2); sendInference(exp, lem, InferenceId::STRINGS_ARRAY_NTH_EXTRACT); diff --git a/src/theory/strings/array_solver.cpp b/src/theory/strings/array_solver.cpp index 0dfe2d180..9dd724cfa 100644 --- a/src/theory/strings/array_solver.cpp +++ b/src/theory/strings/array_solver.cpp @@ -20,6 +20,7 @@ #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" #include "util/rational.h" +#include "util/string.h" using namespace cvc5::context; using namespace cvc5::internal::kind; @@ -118,7 +119,7 @@ void ArraySolver::checkTerms(const std::set& termSet) Trace("seq-array-debug") << "check term " << t << "..." << std::endl; if (k == STRING_UPDATE) { - if (!d_termReg.isHandledUpdate(t)) + if (!d_termReg.isHandledUpdateOrSubstr(t)) { // not handled by procedure Trace("seq-array-debug") << "...unhandled" << std::endl; @@ -187,13 +188,14 @@ void ArraySolver::checkTerm(Node t, bool checkInv) Trace("seq-array-debug") << "...norm form size 1" << std::endl; // NOTE: could split on n=0 if needed, do not introduce ITE Kind ck = nf.d_nf[0].getKind(); + bool cIsConst = nf.d_nf[0].isConst(); // Note that (seq.unit c) is rewritten to CONST_SEQUENCE{c}, hence we // check two cases here. It is important for completeness of this schema // to handle this differently from STRINGS_ARRAY_UPDATE_CONCAT / // STRINGS_ARRAY_NTH_CONCAT. Otherwise we would conclude a trivial // equality when update/nth is applied to a constant of length one. - if (ck == SEQ_UNIT - || (ck == CONST_SEQUENCE && Word::getLength(nf.d_nf[0]) == 1)) + if (ck == SEQ_UNIT || ck == STRING_UNIT + || (cIsConst && Word::getLength(nf.d_nf[0]) == 1)) { Trace("seq-array-debug") << "...unit case" << std::endl; // do we know whether n = 0 ? @@ -215,10 +217,9 @@ void ArraySolver::checkTerm(Node t, bool checkInv) { Assert(k == SEQ_NTH); Node val; - if (ck == CONST_SEQUENCE) + if (cIsConst) { - const Sequence& seq = nf.d_nf[0].getConst(); - val = seq.getVec()[0]; + val = Word::getNth(nf.d_nf[0], 0); } else { @@ -239,7 +240,7 @@ void ArraySolver::checkTerm(Node t, bool checkInv) } return; } - else if (ck != CONST_SEQUENCE) + else if (!cIsConst) { if (k == STRING_UPDATE) { @@ -250,7 +251,7 @@ void ArraySolver::checkTerm(Node t, bool checkInv) NormalForm& nfSelf = d_csolver.getNormalForm(rself); if (nfSelf.d_nf.size() == 1) { - // otherwise, if the normal form is not a constant sequence, and we + // otherwise, if the normal form is not a constant word, and we // are an atomic update term, then this term will be given to the // core array solver. d_currTerms[k].push_back(t); @@ -260,7 +261,7 @@ void ArraySolver::checkTerm(Node t, bool checkInv) } else { - // if the normal form is a constant sequence, it is treated as a + // if the normal form is a constant word, it is treated as a // concatenation. We split per character and case split on whether the // nth/update falls on each character below, which must have a size // greater than one. @@ -305,10 +306,9 @@ void ArraySolver::checkTerm(Node t, bool checkInv) // an optimization to short cut introducing terms like // (seq.nth (seq.unit c) i), which by construction is only relevant in // the context where i = 0, hence we replace by c here. - else if (c.getKind() == CONST_SEQUENCE) + else if (c.isConst()) { - const Sequence& seq = c.getConst(); - if (seq.size() == 1) + if (Word::getLength(c) == 1) { if (k == STRING_UPDATE) { @@ -316,7 +316,7 @@ void ArraySolver::checkTerm(Node t, bool checkInv) } else { - cc = seq.getVec()[0]; + cc = Word::getNth(c, 0); } } } diff --git a/src/theory/strings/base_solver.cpp b/src/theory/strings/base_solver.cpp index 08f0cee77..9193551cf 100644 --- a/src/theory/strings/base_solver.cpp +++ b/src/theory/strings/base_solver.cpp @@ -23,6 +23,7 @@ #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" #include "util/rational.h" +#include "util/string.h" using namespace std; using namespace cvc5::context; @@ -100,14 +101,14 @@ void BaseSolver::checkInit() else { // should not have two constants in the same equivalence class - Assert(cval.getType().isSequence()); std::vector cchars = Word::getChars(cval); if (cchars.size() == 1) { Node oval = prev.isConst() ? n : prev; - Assert(oval.getKind() == SEQ_UNIT); + Assert(oval.getKind() == SEQ_UNIT + || oval.getKind() == STRING_UNIT); s = oval[0]; - t = cchars[0].getConst().getVec()[0]; + t = Word::getNth(cchars[0], 0); // oval is congruent (ignored) in this context d_congruent.insert(oval); } diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index a32e9a4df..baa08c1ca 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -1982,12 +1982,16 @@ void CoreSolver::processDeq(Node ni, Node nj) for (size_t i = 0; i < 2; i++) { NormalForm& nfc = i == 0 ? nfni : nfnj; - if (nfc.d_nf.size() == 0 || nfc.d_nf[0].getKind() != SEQ_UNIT) + if (nfc.d_nf.size() == 0) { // may need to look at the other side continue; } Node u = nfc.d_nf[0]; + if (u.getKind() != SEQ_UNIT && u.getKind() != STRING_UNIT) + { + continue; + } // if the other side is constant like NormalForm& nfo = i == 0 ? nfnj : nfni; if (nfo.d_nf.size() == 0 || !utils::isConstantLike(nfo.d_nf[0])) @@ -2007,11 +2011,11 @@ void CoreSolver::processDeq(Node ni, Node nj) break; } // get the element of the character - vc = vchars[0].getConst().getVec()[0]; + vc = Word::getNth(vchars[0], 0); } else { - Assert(v.getKind() == SEQ_UNIT); + Assert(v.getKind() == SEQ_UNIT || v.getKind() == STRING_UNIT); vc = v[0]; } Assert(u[0].getType() == vc.getType()); @@ -2027,6 +2031,7 @@ void CoreSolver::processDeq(Node ni, Node nj) Node deq = u.eqNode(v).notNode(); std::vector premises; premises.push_back(deq); + Assert(u[0].getType()==vc.getType()); Node conc = u[0].eqNode(vc).notNode(); d_im.sendInference(premises, conc, InferenceId::STRINGS_UNIT_INJ_DEQ, false, true); return; @@ -2455,6 +2460,7 @@ void CoreSolver::processDeqExtensionality(Node n1, Node n2) ss1 = nm->mkNode(SEQ_NTH, n1, k); ss2 = nm->mkNode(SEQ_NTH, n2, k); } + // disequality between nth/substr Node conc1 = ss1.eqNode(ss2).negate(); diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index 1d7cd69cc..d51f812d9 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -70,6 +70,7 @@ ExtfSolver::ExtfSolver(Env& env, d_extt.addFunctionKind(kind::STRING_TO_LOWER); d_extt.addFunctionKind(kind::STRING_TO_UPPER); d_extt.addFunctionKind(kind::STRING_REV); + d_extt.addFunctionKind(kind::STRING_UNIT); d_extt.addFunctionKind(kind::SEQ_UNIT); d_extt.addFunctionKind(kind::SEQ_NTH); @@ -150,8 +151,8 @@ bool ExtfSolver::doReduction(int effort, Node n) return false; } } - else if (k == SEQ_UNIT || k == STRING_IN_REGEXP || k == STRING_TO_CODE - || (k == STRING_CONTAINS && pol == 0)) + else if (k == SEQ_UNIT || k == STRING_UNIT || k == STRING_IN_REGEXP + || k == STRING_TO_CODE || (k == STRING_CONTAINS && pol == 0)) { // never necessary to reduce seq.unit. str.to_code or str.in_re here. // also, we do not reduce str.contains that are preregistered but not @@ -168,7 +169,7 @@ bool ExtfSolver::doReduction(int effort, Node n) return false; } else if ((k == STRING_UPDATE || k == STRING_SUBSTR) - && d_termReg.isHandledUpdate(n)) + && d_termReg.isHandledUpdateOrSubstr(n)) { // don't need to reduce certain seq.update // don't need to reduce certain seq.extract with length 1 diff --git a/src/theory/strings/proof_checker.cpp b/src/theory/strings/proof_checker.cpp index 0b7f84fc3..7b177241c 100644 --- a/src/theory/strings/proof_checker.cpp +++ b/src/theory/strings/proof_checker.cpp @@ -492,21 +492,20 @@ Node StringProofRuleChecker::checkInternal(PfRule id, Node t[2]; for (size_t i = 0; i < 2; i++) { - if (children[0][i].getKind() == SEQ_UNIT) + Node c = children[0][i]; + Kind k = c.getKind(); + if (k == SEQ_UNIT || k == STRING_UNIT) { - t[i] = children[0][i][0]; + t[i] = c[0]; } - else if (children[0][i].isConst()) + else if (c.isConst()) { // notice that Word::getChars is not the right call here, since it // gets a vector of sequences of length one. We actually need to - // extract the character, which is a sequence-specific operation. - const Sequence& sx = children[0][i].getConst(); - const std::vector& vec = sx.getVec(); - if (vec.size() == 1) + // extract the character. + if (Word::getLength(c) == 1) { - // the character of the single character sequence - t[i] = vec[0]; + t[i] = Word::getNth(c, 0); } } if (t[i].isNull()) diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index e4d702905..42e70db54 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -228,6 +228,7 @@ const char* toString(Rewrite r) case Rewrite::CHARAT_ELIM: return "CHARAT_ELIM"; case Rewrite::SEQ_UNIT_EVAL: return "SEQ_UNIT_EVAL"; case Rewrite::SEQ_NTH_EVAL: return "SEQ_NTH_EVAL"; + case Rewrite::SEQ_NTH_EVAL_OOB: return "SEQ_NTH_EVAL_OOB"; case Rewrite::SEQ_NTH_EVAL_SYM: return "SEQ_NTH_EVAL_SYM"; default: return "?"; } diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index 98a3bc866..b62f1f347 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -229,6 +229,7 @@ enum class Rewrite : uint32_t CHARAT_ELIM, SEQ_UNIT_EVAL, SEQ_NTH_EVAL, + SEQ_NTH_EVAL_OOB, SEQ_NTH_EVAL_SYM }; diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 3b3290b26..acdc43879 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -639,7 +639,7 @@ Node SequencesRewriter::rewriteLength(Node node) Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]); return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV); } - else if (nk0 == SEQ_UNIT) + else if (nk0 == SEQ_UNIT || nk0 == STRING_UNIT) { Node retNode = nm->mkConstInt(Rational(1)); return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT); @@ -1757,11 +1757,16 @@ Node SequencesRewriter::rewriteSeqNth(Node node) if (posInt.fitsUnsignedInt() && posInt < Integer(len)) { size_t pos = posInt.toUnsignedInt(); - std::vector elements = s.getConst().getVec(); - const Node& ret = elements[pos]; + Node ret = Word::getNth(s, pos); return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL); } } + if (s.getType().isString()) + { + NodeManager* nm = NodeManager::currentNM(); + Node ret = nm->mkConstInt(Rational(-1)); + return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL_OOB); + } } std::vector prefix, suffix; @@ -1769,12 +1774,16 @@ Node SequencesRewriter::rewriteSeqNth(Node node) if ((i.isConst() && i.getConst().isZero()) || d_stringsEntail.stripSymbolicLength(suffix, prefix, 1, i, true)) { - if (suffix.size() > 0 && suffix[0].getKind() == SEQ_UNIT) + if (suffix.size() > 0) { - // (seq.nth (seq.++ prefix (seq.unit x) suffix) n) ---> x - // if len(prefix) = n - Node ret = suffix[0][0]; - return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL_SYM); + if (suffix[0].getKind() == SEQ_UNIT) + { + // (seq.nth (seq.++ prefix (seq.unit x) suffix) n) ---> x + // if len(prefix) = n + Node ret = suffix[0][0]; + return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL_SYM); + } + // TODO: STRING_UNIT? } } @@ -3642,6 +3651,7 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype) Node SequencesRewriter::rewriteSeqUnit(Node node) { + Assert(node.getKind() == SEQ_UNIT); NodeManager* nm = NodeManager::currentNM(); if (node[0].isConst()) { diff --git a/src/theory/strings/strings_rewriter.cpp b/src/theory/strings/strings_rewriter.cpp index 5cc9484cb..056a43f03 100644 --- a/src/theory/strings/strings_rewriter.cpp +++ b/src/theory/strings/strings_rewriter.cpp @@ -73,6 +73,10 @@ RewriteResponse StringsRewriter::postRewrite(TNode node) { retNode = rewriteStringFromCode(node); } + else if (nk == STRING_UNIT) + { + retNode = rewriteStringUnit(node); + } else { return SequencesRewriter::postRewrite(node); @@ -326,6 +330,24 @@ Node StringsRewriter::rewriteStringIsDigit(Node n) return returnRewrite(n, retNode, Rewrite::IS_DIGIT_ELIM); } +Node StringsRewriter::rewriteStringUnit(Node n) +{ + Assert(n.getKind() == STRING_UNIT); + NodeManager* nm = NodeManager::currentNM(); + if (n[0].isConst()) + { + Integer i = n[0].getConst().getNumerator(); + Node ret; + if (i >= 0 && i < d_alphaCard) + { + std::vector svec = {i.toUnsignedInt()}; + ret = nm->mkConst(String(svec)); + return returnRewrite(n, ret, Rewrite::SEQ_UNIT_EVAL); + } + } + return n; +} + } // namespace strings } // namespace theory } // namespace cvc5::internal diff --git a/src/theory/strings/strings_rewriter.h b/src/theory/strings/strings_rewriter.h index 2f4e2b0ee..d7907afc2 100644 --- a/src/theory/strings/strings_rewriter.h +++ b/src/theory/strings/strings_rewriter.h @@ -102,6 +102,14 @@ class StringsRewriter : public SequencesRewriter */ Node rewriteStringIsDigit(Node n); + /** rewrite string unit + * + * This is the entry point for post-rewriting terms n of the form + * str.unit( t ) + * Returns the rewritten form of n. + */ + Node rewriteStringUnit(Node n); + private: /** The cardinality of the alphabet */ uint32_t d_alphaCard; diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index 1e7da4c70..e30750474 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -77,6 +77,14 @@ uint32_t TermRegistry::getAlphabetCardinality() const { return d_alphaCard; } void TermRegistry::finishInit(InferenceManager* im) { d_im = im; } +Node mkCodeRange(Node t, uint32_t alphaCard) +{ + NodeManager* nm = NodeManager::currentNM(); + return nm->mkNode(AND, + nm->mkNode(GEQ, t, nm->mkConstInt(Rational(0))), + nm->mkNode(LT, t, nm->mkConstInt(Rational(alphaCard)))); +} + Node TermRegistry::eagerReduce(Node t, SkolemCache* sc, uint32_t alphaCard) { NodeManager* nm = NodeManager::currentNM(); @@ -88,10 +96,7 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc, uint32_t alphaCard) Node len = nm->mkNode(STRING_LENGTH, t[0]); Node code_len = len.eqNode(nm->mkConstInt(Rational(1))); Node code_eq_neg1 = t.eqNode(nm->mkConstInt(Rational(-1))); - Node code_range = - nm->mkNode(AND, - nm->mkNode(GEQ, t, nm->mkConstInt(Rational(0))), - nm->mkNode(LT, t, nm->mkConstInt(Rational(alphaCard)))); + Node code_range = mkCodeRange(t, alphaCard); lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); } else if (tk == STRING_INDEXOF || tk == STRING_INDEXOF_RE) @@ -480,7 +485,7 @@ bool TermRegistry::hasStringCode() const { return d_hasStrCode; } bool TermRegistry::hasSeqUpdate() const { return d_hasSeqUpdate; } -bool TermRegistry::isHandledUpdate(Node n) +bool TermRegistry::isHandledUpdateOrSubstr(Node n) { Assert(n.getKind() == STRING_UPDATE || n.getKind() == STRING_SUBSTR); NodeManager* nm = NodeManager::currentNM(); diff --git a/src/theory/strings/term_registry.h b/src/theory/strings/term_registry.h index d6cb77636..8092b1b0a 100644 --- a/src/theory/strings/term_registry.h +++ b/src/theory/strings/term_registry.h @@ -164,8 +164,8 @@ class TermRegistry : protected EnvObj * @return true if any seq.nth or seq.update terms have been preregistered */ bool hasSeqUpdate() const; - /** is handled update */ - bool isHandledUpdate(Node n); + /** is handled update or substring */ + bool isHandledUpdateOrSubstr(Node n); /** get base */ Node getUpdateBase(Node n); //---------------------------- end queries diff --git a/src/theory/strings/theory_strings_type_rules.cpp b/src/theory/strings/theory_strings_type_rules.cpp index ae1f3613a..9ddc258a1 100644 --- a/src/theory/strings/theory_strings_type_rules.cpp +++ b/src/theory/strings/theory_strings_type_rules.cpp @@ -334,13 +334,13 @@ TypeNode SeqNthTypeRule::computeType(NodeManager* nodeManager, TNode n, bool check) { + Assert(n.getKind() == kind::SEQ_NTH); TypeNode t = n[0].getType(check); - if (check && !t.isSequence()) + if (check && !t.isStringLike()) { - throw TypeCheckingExceptionPrivate(n, "expecting a sequence in nth"); + throw TypeCheckingExceptionPrivate(n, + "expecting a string-like term in nth"); } - - TypeNode t1 = t.getSequenceElementType(); if (check) { TypeNode t2 = n[1].getType(check); @@ -350,7 +350,12 @@ TypeNode SeqNthTypeRule::computeType(NodeManager* nodeManager, n, "expecting an integer start term in nth"); } } - return t1; + if (t.isSequence()) + { + return t.getSequenceElementType(); + } + Assert(t.isString()); + return nodeManager->integerType(); } Cardinality SequenceProperties::computeCardinality(TypeNode type) diff --git a/test/regress/cli/regress0/seq/seq-nth-type-check.smt2 b/test/regress/cli/regress0/seq/seq-nth-type-check.smt2 index 3e9c38e7b..4c4587613 100644 --- a/test/regress/cli/regress0/seq/seq-nth-type-check.smt2 +++ b/test/regress/cli/regress0/seq/seq-nth-type-check.smt2 @@ -1,6 +1,6 @@ ; DISABLE-TESTER: dump -; EXPECT: expecting a sequence -; SCRUBBER: grep -o "expecting a sequence" +; EXPECT: expecting a string-like +; SCRUBBER: grep -o "expecting a string-like" ; EXIT: 1 (set-logic QF_SLIA) (declare-const i Int)