From fa43b8842bd9082401cbf85c8de213a8ae152603 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 22 Jun 2022 19:45:52 -0500 Subject: [PATCH] Fix handling of injectivity for str.unit (#8899) Fixes #8890. --- src/theory/inference_id.cpp | 2 + src/theory/inference_id.h | 5 +++ src/theory/strings/base_solver.cpp | 43 +++++++++++++++++-- src/theory/strings/base_solver.h | 5 +++ src/theory/strings/core_solver.cpp | 6 ++- src/theory/strings/infer_proof_cons.cpp | 1 + src/theory/strings/term_registry.cpp | 12 +----- src/theory/strings/theory_strings_utils.cpp | 8 ++++ src/theory/strings/theory_strings_utils.h | 5 +++ test/regress/cli/CMakeLists.txt | 1 + .../regress1/strings/issue8890-inj-oob.smt2 | 9 ++++ 11 files changed, 81 insertions(+), 16 deletions(-) create mode 100644 test/regress/cli/regress1/strings/issue8890-inj-oob.smt2 diff --git a/src/theory/inference_id.cpp b/src/theory/inference_id.cpp index 55cb72451..495971dc9 100644 --- a/src/theory/inference_id.cpp +++ b/src/theory/inference_id.cpp @@ -387,6 +387,8 @@ const char* toString(InferenceId i) case InferenceId::STRINGS_I_CONST_CONFLICT: return "STRINGS_I_CONST_CONFLICT"; case InferenceId::STRINGS_I_NORM: return "STRINGS_I_NORM"; + case InferenceId::STRINGS_UNIT_SPLIT: return "STRINGS_UNIT_SPLIT"; + case InferenceId::STRINGS_UNIT_INJ_OOB: return "STRINGS_UNIT_INJ_OOB"; case InferenceId::STRINGS_UNIT_INJ: return "STRINGS_UNIT_INJ"; case InferenceId::STRINGS_UNIT_CONST_CONFLICT: return "STRINGS_UNIT_CONST_CONFLICT"; diff --git a/src/theory/inference_id.h b/src/theory/inference_id.h index 3a6452e45..4166560e3 100644 --- a/src/theory/inference_id.h +++ b/src/theory/inference_id.h @@ -561,6 +561,11 @@ enum class InferenceId // equal after e.g. removing strings that are currently empty. For example: // y = "" ^ z = "" => x ++ y = z ++ x STRINGS_I_NORM, + // split between the argument of two equated str.unit terms + STRINGS_UNIT_SPLIT, + // a code point must be out of bounds due to (str.unit x) = (str.unit y) and + // x != y. + STRINGS_UNIT_INJ_OOB, // injectivity of seq.unit // (seq.unit x) = (seq.unit y) => x=y, or // (seq.unit x) = (seq.unit c) => x=c diff --git a/src/theory/strings/base_solver.cpp b/src/theory/strings/base_solver.cpp index 9193551cf..998926802 100644 --- a/src/theory/strings/base_solver.cpp +++ b/src/theory/strings/base_solver.cpp @@ -37,7 +37,12 @@ BaseSolver::BaseSolver(Env& env, SolverState& s, InferenceManager& im, TermRegistry& tr) - : EnvObj(env), d_state(s), d_im(im), d_termReg(tr), d_congruent(context()) + : EnvObj(env), + d_state(s), + d_im(im), + d_termReg(tr), + d_congruent(context()), + d_strUnitOobEq(userContext()) { d_false = NodeManager::currentNM()->mkConst(false); d_cardSize = options().strings.stringsAlphaCard; @@ -56,6 +61,7 @@ void BaseSolver::checkInit() // count of congruent, non-congruent per operator (independent of type), // for debugging. std::map> congruentCount; + NodeManager* nm = NodeManager::currentNM(); eq::EqualityEngine* ee = d_state.getEqualityEngine(); eq::EqClassesIterator eqcs_i = eq::EqClassesIterator(ee); while (!eqcs_i.isFinished()) @@ -122,10 +128,39 @@ void BaseSolver::checkInit() } if (!d_state.areEqual(s, t)) { - // (seq.unit x) = (seq.unit y) => x=y, or - // (seq.unit x) = (seq.unit c) => x=c Assert(s.getType() == t.getType()); - d_im.sendInference(exp, s.eqNode(t), InferenceId::STRINGS_UNIT_INJ); + Node eq = s.eqNode(t); + if (n.getType().isString()) + { + // String unit is not injective, due to invalid code points. + // We do an inference scheme in two parts. + // for (str.unit x), (str.unit y): x = y or x != y + if (!d_state.areDisequal(s, t)) + { + d_im.sendSplit(s, t, InferenceId::STRINGS_UNIT_SPLIT); + } + else if (d_strUnitOobEq.find(eq) == d_strUnitOobEq.end()) + { + // cache that we have performed this inference + Node eqSym = t.eqNode(s); + d_strUnitOobEq.insert(eq); + d_strUnitOobEq.insert(eqSym); + exp.push_back(eq.notNode()); + // (str.unit x) = (str.unit y) ^ x != y => + // x or y is not a valid code point + Node scr = utils::mkCodeRange(s, d_cardSize); + Node tcr = utils::mkCodeRange(t, d_cardSize); + Node conc = nm->mkNode(OR, scr.notNode(), tcr.notNode()); + d_im.sendInference( + exp, conc, InferenceId::STRINGS_UNIT_INJ_OOB); + } + } + else + { + // (seq.unit x) = (seq.unit y) => x=y, or + // (seq.unit x) = (seq.unit c) => x=c + d_im.sendInference(exp, eq, InferenceId::STRINGS_UNIT_INJ); + } } } // update best content diff --git a/src/theory/strings/base_solver.h b/src/theory/strings/base_solver.h index 69c14d347..f687ed0eb 100644 --- a/src/theory/strings/base_solver.h +++ b/src/theory/strings/base_solver.h @@ -233,6 +233,11 @@ class BaseSolver : protected EnvObj * various inference schemas implemented by this class. */ NodeSet d_congruent; + /** + * Set of equalities that we have applied STRINGS_UNIT_INJ_OOB to + * in the current user context + */ + NodeSet d_strUnitOobEq; /** * Maps equivalence classes to their info, see description of `BaseEqcInfo` * for more information. diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index 8c74af50f..99aac3815 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -1276,7 +1276,10 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Trace("strings-solve-debug") << "Process " << x << " ... " << y << std::endl; - if (x == y) + // require checking equal here, usually x == y, but this also holds the + // case of (str.unit a) and (str.unit b) for distinct a, b, where we do + // not want to unify these terms here. + if (d_state.areEqual(x, y)) { // The normal forms have the same term at the current position. We just // continue with the next index. By construction of the normal forms, we @@ -1290,7 +1293,6 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, index++; continue; } - Assert(!d_state.areEqual(x, y)); std::vector lenExp; Node xLenTerm = d_state.getLength(x, lenExp); diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp index 212bc56d3..567f61f26 100644 --- a/src/theory/strings/infer_proof_cons.cpp +++ b/src/theory/strings/infer_proof_cons.cpp @@ -664,6 +664,7 @@ void InferProofCons::convert(InferenceId infer, case InferenceId::STRINGS_DEQ_STRINGS_EQ: case InferenceId::STRINGS_DEQ_LENS_EQ: case InferenceId::STRINGS_DEQ_LENGTH_SP: + case InferenceId::STRINGS_UNIT_SPLIT: { if (conc.getKind() != OR) { diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index b85e3460b..4ca2ac5f7 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -77,14 +77,6 @@ uint32_t TermRegistry::getAlphabetCardinality() const { return d_alphaCard; } void TermRegistry::finishInit(InferenceManager* im) { d_im = im; } -Node mkCodeRange(Node t, uint32_t alphaCard) -{ - NodeManager* nm = NodeManager::currentNM(); - return nm->mkNode(AND, - nm->mkNode(GEQ, t, nm->mkConstInt(Rational(0))), - nm->mkNode(LT, t, nm->mkConstInt(Rational(alphaCard)))); -} - Node TermRegistry::eagerReduce(Node t, SkolemCache* sc, uint32_t alphaCard) { NodeManager* nm = NodeManager::currentNM(); @@ -96,7 +88,7 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc, uint32_t alphaCard) Node len = nm->mkNode(STRING_LENGTH, t[0]); Node code_len = len.eqNode(nm->mkConstInt(Rational(1))); Node code_eq_neg1 = t.eqNode(nm->mkConstInt(Rational(-1))); - Node code_range = mkCodeRange(t, alphaCard); + Node code_range = utils::mkCodeRange(t, alphaCard); lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); } else if (tk == SEQ_NTH) @@ -111,7 +103,7 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc, uint32_t alphaCard) Node c2 = nm->mkNode(GT, nm->mkNode(STRING_LENGTH, s), n); // check whether this application of seq.nth is defined. Node cond = nm->mkNode(AND, c1, c2); - Node code_range = mkCodeRange(t, alphaCard); + Node code_range = utils::mkCodeRange(t, alphaCard); // the lemma for `seq.nth` lemma = nm->mkNode(ITE, cond, code_range, t.eqNode(nm->mkConstInt(Rational(-1)))); // IF: n >=0 AND n < len( s ) diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index e692e93c4..a3ab50919 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -449,6 +449,14 @@ Node mkAbstractStringValueForLength(Node n, Node len, size_t id) return quantifiers::mkNamedQuant(WITNESS, bvl, pred, ss.str()); } +Node mkCodeRange(Node t, uint32_t alphaCard) +{ + NodeManager* nm = NodeManager::currentNM(); + return nm->mkNode(AND, + nm->mkNode(GEQ, t, nm->mkConstInt(Rational(0))), + nm->mkNode(LT, t, nm->mkConstInt(Rational(alphaCard)))); +} + } // namespace utils } // namespace strings } // namespace theory diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index e6671d87e..14172a3df 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -217,6 +217,11 @@ Node mkForallInternal(Node bvl, Node body); */ Node mkAbstractStringValueForLength(Node n, Node len, size_t id); +/** + * Make the formula (and (>= t 0) (< t alphaCard)). + */ +Node mkCodeRange(Node t, uint32_t alphaCard); + } // namespace utils } // namespace strings } // namespace theory diff --git a/test/regress/cli/CMakeLists.txt b/test/regress/cli/CMakeLists.txt index 45eb213b8..30f1d7177 100644 --- a/test/regress/cli/CMakeLists.txt +++ b/test/regress/cli/CMakeLists.txt @@ -2622,6 +2622,7 @@ set(regress_1_tests regress1/strings/issue8094-witness-model.smt2 regress1/strings/issue8347-has-skolem.smt2 regress1/strings/issue8434-nterm-str-rw.smt2 + regress1/strings/issue8890-inj-oob.smt2 regress1/strings/kaluza-fl.smt2 regress1/strings/loop002.smt2 regress1/strings/loop003.smt2 diff --git a/test/regress/cli/regress1/strings/issue8890-inj-oob.smt2 b/test/regress/cli/regress1/strings/issue8890-inj-oob.smt2 new file mode 100644 index 000000000..4705a91e5 --- /dev/null +++ b/test/regress/cli/regress1/strings/issue8890-inj-oob.smt2 @@ -0,0 +1,9 @@ +; COMMAND-LINE: --strings-exp --mbqi --strings-fmf +; EXPECT: sat +(set-logic ALL) +(declare-fun b (Int) Bool) +(declare-fun v () String) +(declare-fun a () String) +(assert (or (b 0) (= 0 (ite (= 1 (str.len v)) 1 0)))) +(assert (forall ((e String) (va Int)) (or (= va 0) (distinct 0 (ite (str.prefixof "-" a) (str.to_int (str.substr v 1 (str.len e))) (str.to_int e)))))) +(check-sat) -- 2.30.2