From: Andrew Reynolds Date: Wed, 3 Nov 2021 16:12:18 +0000 (-0500) Subject: Formalize more string skolems (#7554) X-Git-Tag: cvc5-1.0.0~902 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d527b3f2501410770a76977efa180009e06ea172;p=cvc5.git Formalize more string skolems (#7554) This properly formalizes all string skolems used in reduction and preprocessing. This avoids proof checking failures due to non-deterministism when checking steps for these modules. Fixes cvc5/cvc5-projects#334. Fixes cvc5/cvc5-projects#331. --- diff --git a/src/expr/skolem_manager.cpp b/src/expr/skolem_manager.cpp index 80626fbc6..e8651ea75 100644 --- a/src/expr/skolem_manager.cpp +++ b/src/expr/skolem_manager.cpp @@ -53,6 +53,18 @@ const char* toString(SkolemFunId id) case SkolemFunId::SELECTOR_WRONG: return "SELECTOR_WRONG"; case SkolemFunId::SHARED_SELECTOR: return "SHARED_SELECTOR"; case SkolemFunId::SEQ_NTH_OOB: return "SEQ_NTH_OOB"; + case SkolemFunId::STRINGS_NUM_OCCUR: return "STRINGS_NUM_OCCUR"; + case SkolemFunId::STRINGS_OCCUR_INDEX: return "STRINGS_OCCUR_INDEX"; + case SkolemFunId::STRINGS_OCCUR_LEN: return "STRINGS_OCCUR_LEN"; + case SkolemFunId::STRINGS_DEQ_DIFF: return "STRINGS_DEQ_DIFF"; + case SkolemFunId::STRINGS_REPLACE_ALL_RESULT: + return "STRINGS_REPLACE_ALL_RESULT"; + case SkolemFunId::STRINGS_ITOS_RESULT: return "STRINGS_ITOS_RESULT"; + case SkolemFunId::STRINGS_STOI_RESULT: return "STRINGS_STOI_RESULT"; + case SkolemFunId::STRINGS_STOI_NON_DIGIT: return "STRINGS_STOI_NON_DIGIT"; + case SkolemFunId::SK_FIRST_MATCH_PRE: return "SK_FIRST_MATCH_PRE"; + case SkolemFunId::SK_FIRST_MATCH: return "SK_FIRST_MATCH"; + case SkolemFunId::SK_FIRST_MATCH_POST: return "SK_FIRST_MATCH_POST"; case SkolemFunId::RE_UNFOLD_POS_COMPONENT: return "RE_UNFOLD_POS_COMPONENT"; case SkolemFunId::BAGS_MAP_PREIMAGE: return "BAGS_MAP_PREIMAGE"; case SkolemFunId::BAGS_MAP_SUM: return "BAGS_MAP_SUM"; @@ -221,8 +233,14 @@ Node SkolemManager::mkSkolemFunction(SkolemFunId id, const std::vector& cacheVals, int flags) { - Assert(cacheVals.size() > 1); - Node cacheVal = NodeManager::currentNM()->mkNode(SEXPR, cacheVals); + Node cacheVal; + // use null node if cacheVals is empty + if (!cacheVals.empty()) + { + cacheVal = cacheVals.size() == 1 + ? cacheVals[0] + : NodeManager::currentNM()->mkNode(SEXPR, cacheVals); + } return mkSkolemFunction(id, tn, cacheVal, flags); } diff --git a/src/expr/skolem_manager.h b/src/expr/skolem_manager.h index 90e935767..16cab62ca 100644 --- a/src/expr/skolem_manager.h +++ b/src/expr/skolem_manager.h @@ -44,6 +44,67 @@ enum class SkolemFunId SHARED_SELECTOR, /** an application of seq.nth that is out of bounds */ SEQ_NTH_OOB, + //----- string skolems are cached based on two strings (a, b) + /** exists k. ( b occurs k times in a ) */ + STRINGS_NUM_OCCUR, + /** For function k: Int -> Int + * exists k. + * forall 0 <= x <= n, + * k(x) is the end index of the x^th occurrence of b in a + * where n is the number of occurrences of b in a, and k(0)=0. + */ + STRINGS_OCCUR_INDEX, + /** + * For function k: Int -> Int + * exists k. + * forall 0 <= x < n, + * k(x) is the length of the x^th occurrence of b in a (excluding + * matches of empty strings) + * where b is a regular expression, n is the number of occurrences of b + * in a, and k(0)=0. + */ + STRINGS_OCCUR_LEN, + /** + * Diff index for disequalities a != b => substr(a,k,1) != substr(b,k,1) + */ + STRINGS_DEQ_DIFF, + //----- + /** + * A function used to define intermediate results of str.replace_all and + * str.replace_re_all applications. + */ + STRINGS_REPLACE_ALL_RESULT, + /** + * A function used to define intermediate results of str.from_int + * applications. + */ + STRINGS_ITOS_RESULT, + /** + * A function used to define intermediate results of str.to_int + * applications. + */ + STRINGS_STOI_RESULT, + /** + * An index containing a non-digit in a string, used when (str.to_int a) = -1. + */ + STRINGS_STOI_NON_DIGIT, + /** + * For sequence a and regular expression b, + * in_re(a, re.++(_*, b, _*)) => + * exists k_pre, k_match, k_post. + * a = k_pre ++ k_match ++ k_post ^ + * len(k_pre) = indexof_re(x, y, 0) ^ + * (forall l. 0 < l < len(k_match) => + * ~in_re(substr(k_match, 0, l), r)) ^ + * in_re(k_match, b) + * + * k_pre is the prefix before the first, shortest match of b in a. k_match + * is the substring of a matched by b. It is either empty or there is no + * shorter string that matches b. + */ + SK_FIRST_MATCH_PRE, + SK_FIRST_MATCH, + SK_FIRST_MATCH_POST, /** * Regular expression unfold component: if (str.in_re t R), where R is * (re.++ r0 ... rn), then the RE_UNFOLD_POS_COMPONENT{t,R,i} is a string diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index b5e15a129..0b7294f2c 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -2463,8 +2463,7 @@ void CoreSolver::processDeqExtensionality(Node n1, Node n2) NodeManager* nm = NodeManager::currentNM(); SkolemCache* sc = d_termReg.getSkolemCache(); TypeNode intType = nm->integerType(); - Node k = sc->mkTypedSkolemCached( - intType, n1, n2, SkolemCache::SK_DEQ_DIFF, "diff"); + Node k = sc->mkSkolemFun(SkolemFunId::STRINGS_DEQ_DIFF, intType, n1, n2); Node deq = eq.negate(); Node ss1, ss2; if (n1.getType().isString()) diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index 0d2e9cacb..60e6b6a01 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -119,7 +119,6 @@ Node SkolemCache::mkTypedSkolemCached( case SK_ID_V_SPT_REV: case SK_ID_VC_SPT: case SK_ID_VC_SPT_REV: - case SK_FIRST_CTN_POST: case SK_ID_C_SPT: case SK_ID_C_SPT_REV: case SK_ID_DC_SPT: @@ -127,12 +126,11 @@ Node SkolemCache::mkTypedSkolemCached( case SK_ID_DEQ_X: case SK_ID_DEQ_Y: case SK_FIRST_CTN_PRE: + case SK_FIRST_CTN_POST: case SK_PREFIX: case SK_SUFFIX_REM: Unhandled() << "Expected to eliminate Skolem ID " << id << std::endl; break; - case SK_NUM_OCCUR: - case SK_OCCUR_INDEX: default: { Notice() << "Don't know how to handle Skolem ID " << id << std::endl; @@ -318,6 +316,25 @@ Node SkolemCache::mkLengthVar(Node t) return bvm->mkBoundVar(t, intType); } +Node SkolemCache::mkSkolemFun(SkolemFunId id, TypeNode tn, Node a, Node b) +{ + std::vector cacheVals; + for (size_t i = 0; i < 2; i++) + { + Node n = i == 0 ? a : b; + if (!n.isNull()) + { + n = d_rr != nullptr ? d_rr->rewrite(n) : n; + cacheVals.push_back(n); + } + } + NodeManager* nm = NodeManager::currentNM(); + SkolemManager* sm = nm->getSkolemManager(); + Node k = sm->mkSkolemFunction(id, tn, cacheVals); + d_allSkolems.insert(k); + return k; +} + } // namespace strings } // namespace theory } // namespace cvc5 diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h index 2b714781b..ff74e5c0d 100644 --- a/src/theory/strings/skolem_cache.h +++ b/src/theory/strings/skolem_cache.h @@ -58,6 +58,10 @@ class SkolemCache * preconditions below, e.g. where we are considering a' ++ a = b' ++ b. * * All skolems assume a and b are strings unless otherwise stated. + * + * Notice that these identifiers are each syntax sugar for constructing a + * purification skolem. It is required for the purposes of proof checking + * that this only results in calls to SkolemManager::mkPurifySkolem. */ enum SkolemId { @@ -107,21 +111,6 @@ class SkolemCache // of b in a as the witness for contains( a, b ). SK_FIRST_CTN_PRE, SK_FIRST_CTN_POST, - // For sequence a and regular expression b, - // in_re(a, re.++(_*, b, _*)) => - // exists k_pre, k_match, k_post. - // a = k_pre ++ k_match ++ k_post ^ - // len(k_pre) = indexof_re(x, y, 0) ^ - // (forall l. 0 < l < len(k_match) => - // ~in_re(substr(k_match, 0, l), r)) ^ - // in_re(k_match, b) - // - // k_pre is the prefix before the first, shortest match of b in a. k_match - // is the substring of a matched by b. It is either empty or there is no - // shorter string that matches b. - SK_FIRST_MATCH_PRE, - SK_FIRST_MATCH, - SK_FIRST_MATCH_POST, // For integer b, // len( a ) > b => // exists k. a = k ++ a' ^ len( k ) = b @@ -129,33 +118,7 @@ class SkolemCache // For integer b, // b > 0 => // exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 ) - SK_SUFFIX_REM, - // --------------- integer skolems - // exists k. ( b occurs k times in a ) - SK_NUM_OCCUR, - // --------------- function skolems - // For function k: Int -> Int - // exists k. - // forall 0 <= x <= n, - // k(x) is the end index of the x^th occurrence of b in a - // where n is the number of occurrences of b in a, and k(0)=0. - SK_OCCUR_INDEX, - // For function k: Int -> Int - // exists k. - // forall 0 <= x < n, - // k(x) is the length of the x^th occurrence of b in a (excluding - // matches of empty strings) - // where b is a regular expression, n is the number of occurrences of b - // in a, and k(0)=0. - SK_OCCUR_LEN, - // For function k: ((Seq U) x Int) -> U - // exists k. - // forall s, n. - // k(s, n) is some undefined value of sort U - SK_NTH, - // Diff index for disequalities - // a != b => substr(a,k,1) != substr(b,k,1) - SK_DEQ_DIFF + SK_SUFFIX_REM }; /** * Returns a skolem of type string that is cached for (a,b,id) and has @@ -201,6 +164,19 @@ class SkolemCache * that could be matched by r. */ static Node mkLengthVar(Node t); + /** + * Make skolem function, possibly normalizing based on the rewriter of this + * class. This method should be used whenever it is not possible to define + * a Skolem identifier that amounts to purification of a term. + * + * Notice that this method is not static or constant since it tracks the + * Skolem we construct (in d_allSkolems), which is used for finite model + * finding. + */ + Node mkSkolemFun(SkolemFunId id, + TypeNode tn, + Node a = Node::null(), + Node b = Node::null()); private: /** diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 9a793e14f..e0f0a8dac 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -998,21 +998,20 @@ TrustNode TheoryStrings::ppRewrite(TNode atom, std::vector& lems) } if (atom.getKind() == STRING_FROM_CODE) { - // str.from_code(t) ---> - // witness k. ite(0 <= t < |A|, t = str.to_code(k), k = "") + // str.from_code(t) ---> ite(0 <= t < |A|, t = str.to_code(k), k = "") NodeManager* nm = NodeManager::currentNM(); + SkolemCache* sc = d_termReg.getSkolemCache(); + Node k = sc->mkSkolemCached(atom, SkolemCache::SK_PURIFY, "kFromCode"); Node t = atom[0]; Node card = nm->mkConst(Rational(d_termReg.getAlphabetCardinality())); Node cond = nm->mkNode(AND, nm->mkNode(LEQ, d_zero, t), nm->mkNode(LT, t, card)); - Node v = nm->mkBoundVar(nm->stringType()); Node emp = Word::mkEmptyWord(atom.getType()); Node pred = nm->mkNode( - ITE, cond, t.eqNode(nm->mkNode(STRING_TO_CODE, v)), v.eqNode(emp)); - SkolemManager* sm = nm->getSkolemManager(); - Node ret = sm->mkSkolem(v, pred, "kFromCode"); - lems.push_back(SkolemLemma(ret, nullptr)); - return TrustNode::mkTrustRewrite(atom, ret, nullptr); + ITE, cond, t.eqNode(nm->mkNode(STRING_TO_CODE, k)), k.eqNode(emp)); + TrustNode tnk = TrustNode::mkTrustLemma(pred); + lems.push_back(SkolemLemma(tnk, k)); + return TrustNode::mkTrustRewrite(atom, k, nullptr); } TrustNode ret; Node atomRet = atom; diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 550a9af8b..31f43d565 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -53,7 +53,6 @@ Node StringsPreprocess::reduce(Node t, << "StringsPreprocess::reduce: " << t << std::endl; Node retNode = t; NodeManager* nm = NodeManager::currentNM(); - SkolemManager* sm = nm->getSkolemManager(); Node zero = nm->mkConst(Rational(0)); Node one = nm->mkConst(Rational(1)); Node negOne = nm->mkConst(Rational(-1)); @@ -343,8 +342,8 @@ Node StringsPreprocess::reduce(Node t, std::vector conc; std::vector< TypeNode > argTypes; argTypes.push_back(nm->integerType()); - Node u = - sm->mkDummySkolem("U", nm->mkFunctionType(argTypes, nm->integerType())); + TypeNode itosResType = nm->mkFunctionType(argTypes, nm->integerType()); + Node u = sc->mkSkolemFun(SkolemFunId::STRINGS_ITOS_RESULT, itosResType, t); Node lem = nm->mkNode(GEQ, leni, one); conc.push_back(lem); @@ -423,7 +422,8 @@ Node StringsPreprocess::reduce(Node t, Node emp = Word::mkEmptyWord(s.getType()); Node sEmpty = s.eqNode(emp); - Node k = sm->mkDummySkolem("k", nm->integerType()); + Node k = sc->mkSkolemFun( + SkolemFunId::STRINGS_STOI_NON_DIGIT, nm->integerType(), t); Node kc1 = nm->mkNode(GEQ, k, zero); Node kc2 = nm->mkNode(LT, k, lens); Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0"))); @@ -439,8 +439,9 @@ Node StringsPreprocess::reduce(Node t, std::vector conc2; std::vector< TypeNode > argTypes; argTypes.push_back(nm->integerType()); + TypeNode stoiResultType = nm->mkFunctionType(argTypes, nm->integerType()); Node u = - sm->mkDummySkolem("U", nm->mkFunctionType(argTypes, nm->integerType())); + sc->mkSkolemFun(SkolemFunId::STRINGS_STOI_RESULT, stoiResultType, t); lem = stoit.eqNode(nm->mkNode(APPLY_UF, u, lens)); conc2.push_back(lem); @@ -605,15 +606,15 @@ Node StringsPreprocess::reduce(Node t, Node z = t[2]; Node rpaw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw"); - Node numOcc = sc->mkTypedSkolemCached( - nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc"); + Node numOcc = sc->mkSkolemFun( + SkolemFunId::STRINGS_NUM_OCCUR, nm->integerType(), x, y); std::vector argTypes; argTypes.push_back(nm->integerType()); - Node us = - sm->mkDummySkolem("Us", nm->mkFunctionType(argTypes, nm->stringType())); + TypeNode raResultType = nm->mkFunctionType(argTypes, t.getType()); + Node us = sc->mkSkolemFun( + SkolemFunId::STRINGS_REPLACE_ALL_RESULT, raResultType, t); TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType()); - Node uf = sc->mkTypedSkolemCached( - ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf"); + Node uf = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_INDEX, ufType, x, y); Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); Node usno = nm->mkNode(APPLY_UF, us, numOcc); @@ -691,12 +692,10 @@ Node StringsPreprocess::reduce(Node t, // k = z ++ x Node res1 = k.eqNode(nm->mkNode(STRING_CONCAT, z, x)); - Node k1 = - sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_PRE, "rre_pre"); - Node k2 = - sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match"); - Node k3 = - sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post"); + TypeNode ktype = t.getType(); + Node k1 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH_PRE, ktype, x, y); + Node k2 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH, ktype, x, y); + Node k3 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH_POST, ktype, x, y); Node k2Len = nm->mkNode(STRING_LENGTH, k2); // x = k1 ++ k2 ++ k3 Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3)); @@ -740,17 +739,16 @@ Node StringsPreprocess::reduce(Node t, Node z = t[2]; Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k"); - Node numOcc = sc->mkTypedSkolemCached( - nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc"); + Node numOcc = sc->mkSkolemFun( + SkolemFunId::STRINGS_NUM_OCCUR, nm->integerType(), x, y); std::vector argTypes; argTypes.push_back(nm->integerType()); - Node us = - sm->mkDummySkolem("Us", nm->mkFunctionType(argTypes, t.getType())); + TypeNode raResultType = nm->mkFunctionType(argTypes, t.getType()); + Node us = sc->mkSkolemFun( + SkolemFunId::STRINGS_REPLACE_ALL_RESULT, raResultType, t); TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType()); - Node uf = sc->mkTypedSkolemCached( - ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf"); - Node ul = - sc->mkTypedSkolemCached(ufType, x, y, SkolemCache::SK_OCCUR_LEN, "Ul"); + Node uf = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_INDEX, ufType, x, y); + Node ul = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_LEN, ufType, x, y); Node emp = Word::mkEmptyWord(t.getType()); diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index f6c1cf8df..56adf73e7 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -53,6 +53,7 @@ cvc5_add_api_test(issue5074) cvc5_add_api_test(issue4889) cvc5_add_api_test(issue6111) cvc5_add_api_test(proj-issue306) +cvc5_add_api_test(proj-issue334) # if we've built using libedit, then we want the interactive shell tests if (USE_EDITLINE) diff --git a/test/api/proj-issue306.cpp b/test/api/proj-issue306.cpp index 664536a0b..35ecda567 100644 --- a/test/api/proj-issue306.cpp +++ b/test/api/proj-issue306.cpp @@ -1,3 +1,19 @@ +/****************************************************************************** + * Top contributors (to current version): + * Andrew Reynolds + * + * This file is part of the cvc5 project. + * + * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS + * in the top-level source directory and their institutional affiliations. + * All rights reserved. See the file COPYING in the top-level source + * directory for licensing information. + * **************************************************************************** + * + * Test for project issue #306 + * + */ + #include "api/cpp/cvc5.h" using namespace cvc5::api; diff --git a/test/api/proj-issue334.cpp b/test/api/proj-issue334.cpp new file mode 100644 index 000000000..bbb7f1a46 --- /dev/null +++ b/test/api/proj-issue334.cpp @@ -0,0 +1,36 @@ +/****************************************************************************** + * Top contributors (to current version): + * Andrew Reynolds + * + * This file is part of the cvc5 project. + * + * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS + * in the top-level source directory and their institutional affiliations. + * All rights reserved. See the file COPYING in the top-level source + * directory for licensing information. + * **************************************************************************** + * + * Test for project issue #334 + * + */ + +#include "api/cpp/cvc5.h" + +using namespace cvc5::api; + +int main(void) +{ + Solver slv; + slv.setOption("produce-unsat-cores", "true"); + Sort s1 = slv.mkBitVectorSort(1); + Sort s2 = slv.mkFloatingPointSort(8, 24); + Term val = slv.mkBitVector(32, "10000000110010111010111011000101", 2); + Term t1 = slv.mkFloatingPoint(8, 24, val); + Term t2 = slv.mkConst(s1); + Term t4 = slv.mkTerm(Kind::BITVECTOR_TO_NAT, t2); + Term t5 = slv.mkTerm(Kind::STRING_FROM_CODE, t4); + Term t6 = slv.simplify(t5); + Term t7 = slv.mkTerm(Kind::STRING_LEQ, t5, t6); + slv.assertFormula(t7); + slv.simplify(t1); +} diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 7839dc7f2..da7c40bc2 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -2321,6 +2321,7 @@ set(regress_1_tests regress1/strings/policy_variable.smt2 regress1/strings/pre_ctn_no_skolem_share.smt2 regress1/strings/proj254-re-elim-agg.smt2 + regress1/strings/proj-issue331.smt2 regress1/strings/query4674.smt2 regress1/strings/query8485.smt2 regress1/strings/re-all-char-hard.smt2 diff --git a/test/regress/regress1/strings/proj-issue331.smt2 b/test/regress/regress1/strings/proj-issue331.smt2 new file mode 100644 index 000000000..e993419f1 --- /dev/null +++ b/test/regress/regress1/strings/proj-issue331.smt2 @@ -0,0 +1,7 @@ +; COMMAND-LINE: --strings-exp +; EXPECT: unsat +(set-logic ALL) +(set-option :check-proofs true) +(set-info :status unsat) +(declare-const x Int) +(check-sat-assuming ((str.< (str.++ (str.from_int x) (str.replace_re (str.from_int x) re.none (str.from_int 0)) (str.replace_re (str.from_int x) re.none (str.from_int 0))) (str.from_int x))))