From: Andrew Reynolds Date: Fri, 17 Jul 2020 14:40:56 +0000 (-0500) Subject: (proof-new) Updates to strings core solver (#4642) X-Git-Tag: cvc5-1.0.0~3092 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0988217562006d3f59e01dc261f39121df6d75f5;p=cvc5.git (proof-new) Updates to strings core solver (#4642) This updates the core strings solver in preparation for proofs. The main changes include: The addition of the strings PfRule enum values. The definition of a "getConclusion" static method, used by the core solver, and to be used in the future by the strings proof checker. This includes several optional forms of lemmas, which are added as options in this PR. Major simplifications to our inference schemas for disequality handling (a STRING_DECOMPOSE inference rule). Note this is the only significant intended behavioral change in this PR. Minor updates to the form of inferences send to inference manager, for instance to orient equalities in the expected way, and to reorder assumptions. These changes are done for uniformity and make the strings proof reconstruction from inference steps easier. --- diff --git a/src/expr/proof_rule.cpp b/src/expr/proof_rule.cpp index c9250f9ea..c51bfb3c7 100644 --- a/src/expr/proof_rule.cpp +++ b/src/expr/proof_rule.cpp @@ -93,6 +93,26 @@ const char* toString(PfRule id) case PfRule::EXISTS_INTRO: return "EXISTS_INTRO"; case PfRule::SKOLEMIZE: return "SKOLEMIZE"; case PfRule::INSTANTIATE: return "INSTANTIATE"; + //================================================= String rules + case PfRule::CONCAT_EQ: return "CONCAT_EQ"; + case PfRule::CONCAT_UNIFY: return "CONCAT_UNIFY"; + case PfRule::CONCAT_CONFLICT: return "CONCAT_CONFLICT"; + case PfRule::CONCAT_SPLIT: return "CONCAT_SPLIT"; + case PfRule::CONCAT_CSPLIT: return "CONCAT_CSPLIT"; + case PfRule::CONCAT_LPROP: return "CONCAT_LPROP"; + case PfRule::CONCAT_CPROP: return "CONCAT_CPROP"; + case PfRule::STRING_DECOMPOSE: return "STRING_DECOMPOSE"; + case PfRule::STRING_LENGTH_POS: return "STRING_LENGTH_POS"; + case PfRule::STRING_LENGTH_NON_EMPTY: return "STRING_LENGTH_NON_EMPTY"; + case PfRule::STRING_REDUCTION: return "STRING_REDUCTION"; + case PfRule::STRING_EAGER_REDUCTION: return "STRING_EAGER_REDUCTION"; + case PfRule::RE_INTER: return "RE_INTER"; + case PfRule::RE_UNFOLD_POS: return "RE_UNFOLD_POS"; + case PfRule::RE_UNFOLD_NEG: return "RE_UNFOLD_NEG"; + case PfRule::RE_UNFOLD_NEG_CONCAT_FIXED: + return "RE_UNFOLD_NEG_CONCAT_FIXED"; + case PfRule::RE_ELIM: return "RE_ELIM"; + case PfRule::STRING_CODE_INJ: return "STRING_CODE_INJ"; //================================================= Arith rules case PfRule::ARITH_SCALE_SUM_UPPER_BOUNDS: return "ARITH_SCALE_SUM_UPPER_BOUNDS"; case PfRule::ARITH_TRICHOTOMY: return "ARITH_TRICHOTOMY"; diff --git a/src/expr/proof_rule.h b/src/expr/proof_rule.h index e7464dd24..c76e907c7 100644 --- a/src/expr/proof_rule.h +++ b/src/expr/proof_rule.h @@ -529,6 +529,225 @@ enum class PfRule : uint32_t // sigma maps x1 ... xn to t1 ... tn. INSTANTIATE, + //================================================= String rules + //======================== Core solver + // ======== Concat eq + // Children: (P1:(= (str.++ t1 ... tn t) (str.++ t1 ... tn s))) + // Arguments: (b), indicating if reverse direction + // --------------------- + // Conclusion: (= t s) + // + // Notice that t or s may be empty, in which case they are implicit in the + // concatenation above. For example, if + // P1 concludes (= x (str.++ x z)), then + // (CONCAT_EQ P1 :args false) concludes (= "" z) + // + // Also note that constants are split, such that if + // P1 concludes (= (str.++ "abc" x) (str.++ "a" y)), then + // (CONCAT_EQ P1 :args false) concludes (= (str.++ "bc" x) y) + // This splitting is done only for constants such that Word::splitConstant + // returns non-null. + CONCAT_EQ, + // ======== Concat unify + // Children: (P1:(= (str.++ t1 t2) (str.++ s1 s2)), + // P2:(= (str.len t1) (str.len s1))) + // Arguments: (b), indicating if reverse direction + // --------------------- + // Conclusion: (= t1 s1) + CONCAT_UNIFY, + // ======== Concat conflict + // Children: (P1:(= (str.++ c1 t) (str.++ c2 s))) + // Arguments: (b), indicating if reverse direction + // --------------------- + // Conclusion: false + // Where c1, c2 are constants such that Word::splitConstant(c1,c2,index,b) + // is null, in other words, neither is a prefix of the other. + CONCAT_CONFLICT, + // ======== Concat split + // Children: (P1:(= (str.++ t1 t2) (str.++ s1 s2)), + // P2:(not (= (str.len t1) (str.len s1)))) + // Arguments: (false) + // --------------------- + // Conclusion: (or (= t1 (str.++ s1 r_t)) (= s1 (str.++ t1 r_s))) + // where + // r_t = (witness ((z String)) (= z (suf t1 (str.len s1)))), + // r_s = (witness ((z String)) (= z (suf s1 (str.len t1)))). + // + // or the reverse form of the above: + // + // Children: (P1:(= (str.++ t1 t2) (str.++ s1 s2)), + // P2:(not (= (str.len t2) (str.len s2)))) + // Arguments: (true) + // --------------------- + // Conclusion: (or (= t2 (str.++ r_t s2)) (= s2 (str.++ r_s t2))) + // where + // r_t = (witness ((z String)) (= z (pre t2 (- (str.len t2) (str.len + // s2))))), r_s = (witness ((z String)) (= z (pre s2 (- (str.len s2) + // (str.len t2))))). + // + // Above, (suf x n) is shorthand for (str.substr x n (- (str.len x) n)) and + // (pre x n) is shorthand for (str.substr x 0 n). + CONCAT_SPLIT, + // ======== Concat constant split + // Children: (P1:(= (str.++ t1 t2) (str.++ c s2)), + // P2:(not (= (str.len t1) 0))) + // Arguments: (false) + // --------------------- + // Conclusion: (= t1 (str.++ c r)) + // where + // r = (witness ((z String)) (= z (suf t1 1))). + // + // or the reverse form of the above: + // + // Children: (P1:(= (str.++ t1 t2) (str.++ s1 c)), + // P2:(not (= (str.len t2) 0))) + // Arguments: (true) + // --------------------- + // Conclusion: (= t2 (str.++ r c)) + // where + // r = (witness ((z String)) (= z (pre t2 (- (str.len t2) 1)))). + CONCAT_CSPLIT, + // ======== Concat length propagate + // Children: (P1:(= (str.++ t1 t2) (str.++ s1 s2)), + // P2:(> (str.len t1) (str.len s1))) + // Arguments: (false) + // --------------------- + // Conclusion: (= t1 (str.++ s1 r_t)) + // where + // r_t = (witness ((z String)) (= z (suf t1 (str.len s1)))) + // + // or the reverse form of the above: + // + // Children: (P1:(= (str.++ t1 t2) (str.++ s1 s2)), + // P2:(> (str.len t2) (str.len s2))) + // Arguments: (false) + // --------------------- + // Conclusion: (= t2 (str.++ r_t s2)) + // where + // r_t = (witness ((z String)) (= z (pre t2 (- (str.len t2) (str.len + // s2))))). + CONCAT_LPROP, + // ======== Concat constant propagate + // Children: (P1:(= (str.++ t1 w1 t2) (str.++ w2 s)), + // P2:(not (= (str.len t1) 0))) + // Arguments: (false) + // --------------------- + // Conclusion: (= t1 (str.++ w3 r)) + // where + // w1, w2, w3, w4 are words, + // w3 is (pre w2 p), + // w4 is (suf w2 p), + // p = Word::overlap((suf w2 1), w1), + // r = (witness ((z String)) (= z (suf t1 (str.len w3)))). + // In other words, w4 is the largest suffix of (suf w2 1) that can contain a + // prefix of w1; since t1 is non-empty, w3 must therefore be contained in t1. + // + // or the reverse form of the above: + // + // Children: (P1:(= (str.++ t1 w1 t2) (str.++ s w2)), + // P2:(not (= (str.len t2) 0))) + // Arguments: (true) + // --------------------- + // Conclusion: (= t2 (str.++ r w3)) + // where + // w1, w2, w3, w4 are words, + // w3 is (suf w2 (- (str.len w2) p)), + // w4 is (pre w2 (- (str.len w2) p)), + // p = Word::roverlap((pre w2 (- (str.len w2) 1)), w1), + // r = (witness ((z String)) (= z (pre t2 (- (str.len t2) (str.len w3))))). + // In other words, w4 is the largest prefix of (pre w2 (- (str.len w2) 1)) + // that can contain a suffix of w1; since t2 is non-empty, w3 must therefore + // be contained in t2. + CONCAT_CPROP, + // ======== String decompose + // Children: (P1: (>= (str.len t) n) + // Arguments: (false) + // --------------------- + // Conclusion: (and (= t (str.++ w1 w2)) (= (str.len w1) n)) + // or + // Children: (P1: (>= (str.len t) n) + // Arguments: (true) + // --------------------- + // Conclusion: (and (= t (str.++ w1 w2)) (= (str.len w2) n)) + // where + // w1 is (witness ((z String)) (= z (pre t n))) + // w2 is (witness ((z String)) (= z (suf t n))) + STRING_DECOMPOSE, + // ======== Length positive + // Children: none + // Arguments: (t) + // --------------------- + // Conclusion: (or (and (= (str.len t) 0) (= t "")) (> (str.len t 0))) + STRING_LENGTH_POS, + // ======== Length non-empty + // Children: (P1:(not (= t ""))) + // Arguments: none + // --------------------- + // Conclusion: (not (= (str.len t) 0)) + STRING_LENGTH_NON_EMPTY, + //======================== Extended functions + // ======== Reduction + // Children: none + // Arguments: (t) + // --------------------- + // Conclusion: (and R (= t w)) + // where w = strings::StringsPreprocess::reduce(t, R, ...). + // In other words, R is the reduction predicate for extended term t, and w is + // (witness ((z T)) (= z t)) + // Notice that the free variables of R are w and the free variables of t. + STRING_REDUCTION, + // ======== Eager Reduction + // Children: none + // Arguments: (t, id?) + // --------------------- + // Conclusion: R + // where R = strings::TermRegistry::eagerReduce(t, id). + STRING_EAGER_REDUCTION, + //======================== Regular expressions + // ======== Regular expression intersection + // Children: (P:(str.in.re t R1), P:(str.in.re t R2)) + // Arguments: none + // --------------------- + // Conclusion: (str.in.re t (re.inter R1 R2)). + RE_INTER, + // ======== Regular expression unfold positive + // Children: (P:(str.in.re t R)) + // Arguments: none + // --------------------- + // Conclusion:(RegExpOpr::reduceRegExpPos((str.in.re t R))), + // corresponding to the one-step unfolding of the premise. + RE_UNFOLD_POS, + // ======== Regular expression unfold negative + // Children: (P:(not (str.in.re t R))) + // Arguments: none + // --------------------- + // Conclusion:(RegExpOpr::reduceRegExpNeg((not (str.in.re t R)))), + // corresponding to the one-step unfolding of the premise. + RE_UNFOLD_NEG, + // ======== Regular expression unfold negative concat fixed + // Children: (P:(not (str.in.re t R))) + // Arguments: none + // --------------------- + // Conclusion:(RegExpOpr::reduceRegExpNegConcatFixed((not (str.in.re t + // R)),L,i)) where RegExpOpr::getRegExpConcatFixed((not (str.in.re t R)), i) = + // L. corresponding to the one-step unfolding of the premise, optimized for + // fixed length of component i of the regular expression concatenation R. + RE_UNFOLD_NEG_CONCAT_FIXED, + // ======== Regular expression elimination + // Children: (P:F) + // Arguments: none + // --------------------- + // Conclusion: R + // where R = strings::RegExpElimination::eliminate(F). + RE_ELIM, + //======================== Code points + // Children: none + // Arguments: (t, s) + // --------------------- + // Conclusion:(or (= (str.code t) (- 1)) + // (not (= (str.code t) (str.code s))) + // (not (= t s))) + STRING_CODE_INJ, // ======== Adding Inequalities // Note: an ArithLiteral is a term of the form (>< poly const) // where diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index 32c4c64c7..e69fa3317 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -205,3 +205,20 @@ header = "options/strings_options.h" [[option.mode.NONE]] name = "none" help = "Do not compute intersections for regular expressions." + +[[option]] + name = "stringUnifiedVSpt" + category = "regular" + long = "strings-unified-vspt" + type = "bool" + default = "true" + read_only = true + help = "use a single skolem for the variable splitting rule" + +[[option]] + name = "stringLenConc" + category = "regular" + long = "strings-len-conc" + type = "bool" + default = "false" + help = "add skolem length constraints in conclusions of inferences" diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index a38d16279..062bfe12f 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -32,13 +32,15 @@ namespace strings { CoreInferInfo::CoreInferInfo() : d_index(0), d_rev(false) {} -CoreSolver::CoreSolver(context::Context* c, - context::UserContext* u, - SolverState& s, +CoreSolver::CoreSolver(SolverState& s, InferenceManager& im, TermRegistry& tr, BaseSolver& bs) - : d_state(s), d_im(im), d_termReg(tr), d_bsolver(bs), d_nfPairs(c) + : d_state(s), + d_im(im), + d_termReg(tr), + d_bsolver(bs), + d_nfPairs(s.getSatContext()) { d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); @@ -171,7 +173,6 @@ void CoreSolver::checkFlatForms() // conflict, explanation is n = base ^ base = c ^ relevant portion // of ( n = f[n] ) std::vector exp; - d_bsolver.explainConstantEqc(n,eqc,exp); for (int e = firstc; e <= lastc; e++) { if (d_flat_form[n][e].isConst()) @@ -180,9 +181,10 @@ void CoreSolver::checkFlatForms() Assert(d_flat_form_index[n][e] >= 0 && d_flat_form_index[n][e] < (int)n.getNumChildren()); d_im.addToExplanation( - d_flat_form[n][e], n[d_flat_form_index[n][e]], exp); + n[d_flat_form_index[n][e]], d_flat_form[n][e], exp); } } + d_bsolver.explainConstantEqc(n, eqc, exp); Node conc = d_false; d_im.sendInference(exp, conc, Inference::F_NCTN); return; @@ -239,6 +241,8 @@ void CoreSolver::checkFlatForm(std::vector& eqc, << "Check flat form for a = " << a << ", whose flat form is " << d_flat_form[a] << ")" << std::endl; Node b; + // the length explanation + Node lant; do { std::vector exp; @@ -370,10 +374,11 @@ void CoreSolver::checkFlatForm(std::vector& eqc, Trace("strings-ff-debug") << lexp2[j] << std::endl; } } - - exp.insert(exp.end(), lexp.begin(), lexp.end()); - exp.insert(exp.end(), lexp2.begin(), lexp2.end()); - d_im.addToExplanation(lcurr, lcc, exp); + std::vector lexpc; + lexpc.insert(lexpc.end(), lexp.begin(), lexp.end()); + lexpc.insert(lexpc.end(), lexp2.begin(), lexp2.end()); + d_im.addToExplanation(lcurr, lcc, lexpc); + lant = utils::mkAnd(lexpc); conc = ac.eqNode(bc); infType = Inference::F_UNIFY; break; @@ -388,7 +393,6 @@ void CoreSolver::checkFlatForm(std::vector& eqc, Trace("strings-ff-debug") << "Found inference (" << infType << "): " << conc << " based on equality " << a << " == " << b << ", " << isRev << std::endl; - d_im.addToExplanation(a, b, exp); // explain why prefixes up to now were the same for (size_t j = 0; j < count; j++) { @@ -425,6 +429,12 @@ void CoreSolver::checkFlatForm(std::vector& eqc, } } } + d_im.addToExplanation(a, b, exp); + if (!lant.isNull()) + { + // add the length explanation + exp.push_back(lant); + } // Notice that F_EndpointEmp is not typically applied, since // strict prefix equality ( a.b = a ) where a,b non-empty // is conflicting by arithmetic len(a.b)=len(a)+len(b)!=len(a) @@ -549,9 +559,12 @@ void CoreSolver::checkNormalFormsEq() { NormalForm& nfe_eq = getNormalForm(itn->second); // two equivalence classes have same normal form, merge - std::vector nf_exp; - nf_exp.push_back(utils::mkAnd(nfe.d_exp)); - nf_exp.push_back(eqc_to_exp[itn->second]); + std::vector nf_exp(nfe.d_exp.begin(), nfe.d_exp.end()); + Node eexp = eqc_to_exp[itn->second]; + if (eexp != d_true) + { + nf_exp.push_back(eexp); + } Node eq = nfe.d_base.eqNode(nfe_eq.d_base); d_im.sendInference(nf_exp, eq, Inference::NORMAL_FORM); if (d_im.hasProcessed()) @@ -693,6 +706,158 @@ Node CoreSolver::getNormalString(Node x, std::vector& nf_exp) return x; } +Node CoreSolver::getConclusion(Node x, + Node y, + PfRule rule, + bool isRev, + SkolemCache* skc, + std::vector& newSkolems) +{ + Trace("strings-csolver") << "CoreSolver::getConclusion: " << x << " " << y + << " " << rule << " " << isRev << std::endl; + NodeManager* nm = NodeManager::currentNM(); + Node conc; + if (rule == PfRule::CONCAT_SPLIT || rule == PfRule::CONCAT_LPROP) + { + Node sk1; + Node sk2; + if (options::stringUnifiedVSpt()) + { + // must compare so that we are agnostic to order of x/y + Node ux = x < y ? x : y; + Node uy = x < y ? y : x; + Node sk = skc->mkSkolemCached(ux, + uy, + isRev ? SkolemCache::SK_ID_V_UNIFIED_SPT_REV + : SkolemCache::SK_ID_V_UNIFIED_SPT, + "v_spt"); + newSkolems.push_back(sk); + sk1 = sk; + sk2 = sk; + } + else + { + sk1 = skc->mkSkolemCached( + x, + y, + isRev ? SkolemCache::SK_ID_V_SPT_REV : SkolemCache::SK_ID_V_SPT, + "v_spt1"); + sk2 = skc->mkSkolemCached( + y, + x, + isRev ? SkolemCache::SK_ID_V_SPT_REV : SkolemCache::SK_ID_V_SPT, + "v_spt2"); + newSkolems.push_back(sk1); + newSkolems.push_back(sk2); + } + Node eq1 = x.eqNode(isRev ? nm->mkNode(STRING_CONCAT, sk1, y) + : nm->mkNode(STRING_CONCAT, y, sk1)); + + if (rule == PfRule::CONCAT_LPROP) + { + conc = eq1; + } + else + { + Node eq2 = y.eqNode(isRev ? nm->mkNode(STRING_CONCAT, sk2, x) + : nm->mkNode(STRING_CONCAT, x, sk2)); + // make agnostic to x/y + conc = x < y ? nm->mkNode(OR, eq1, eq2) : nm->mkNode(OR, eq2, eq1); + } + if (options::stringUnifiedVSpt() && options::stringLenConc()) + { + // we can assume its length is greater than zero + Node emp = Word::mkEmptyWord(sk1.getType()); + conc = nm->mkNode( + AND, + conc, + sk1.eqNode(emp).negate(), + nm->mkNode( + GT, nm->mkNode(STRING_LENGTH, sk1), nm->mkConst(Rational(0)))); + } + } + else if (rule == PfRule::CONCAT_CSPLIT) + { + Assert(y.isConst()); + size_t yLen = Word::getLength(y); + Node firstChar = + yLen == 1 ? y : (isRev ? Word::suffix(y, 1) : Word::prefix(y, 1)); + Node sk = skc->mkSkolemCached( + x, + isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT, + "c_spt"); + newSkolems.push_back(sk); + conc = x.eqNode(isRev ? nm->mkNode(STRING_CONCAT, sk, firstChar) + : nm->mkNode(STRING_CONCAT, firstChar, sk)); + } + else if (rule == PfRule::CONCAT_CPROP) + { + // expect (str.++ z d) and c + Assert(x.getKind() == STRING_CONCAT && x.getNumChildren() == 2); + Node z = x[isRev ? 1 : 0]; + Node d = x[isRev ? 0 : 1]; + Assert(d.isConst()); + Node c = y; + Assert(c.isConst()); + size_t cLen = Word::getLength(c); + size_t p = getSufficientNonEmptyOverlap(c, d, isRev); + Node preC = + p == cLen ? c : (isRev ? Word::suffix(c, p) : Word::prefix(c, p)); + Node sk = skc->mkSkolemCached( + z, + preC, + isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT, + "c_spt"); + newSkolems.push_back(sk); + conc = z.eqNode(isRev ? nm->mkNode(STRING_CONCAT, sk, preC) + : nm->mkNode(STRING_CONCAT, preC, sk)); + } + + return conc; +} + +size_t CoreSolver::getSufficientNonEmptyOverlap(Node c, Node d, bool isRev) +{ + Assert(c.isConst() && c.getType().isStringLike()); + Assert(d.isConst() && d.getType().isStringLike()); + size_t p; + size_t p2; + size_t cLen = Word::getLength(c); + if (isRev) + { + // Since non-empty, we start with character 1 + Node c1 = Word::prefix(c, cLen - 1); + p = cLen - Word::roverlap(c1, d); + p2 = Word::rfind(c1, d); + } + else + { + Node c1 = Word::substr(c, 1); + p = cLen - Word::overlap(c1, d); + p2 = Word::find(c1, d); + } + return p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p); +} + +Node CoreSolver::getDecomposeConclusion( + Node x, Node l, bool isRev, SkolemCache* skc, std::vector& newSkolems) +{ + Assert(l.getType().isInteger()); + NodeManager* nm = NodeManager::currentNM(); + Node n = isRev ? nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), l) : l; + Node sk1 = skc->mkSkolemCached(x, n, SkolemCache::SK_PREFIX, "dc_spt1"); + newSkolems.push_back(sk1); + Node sk2 = skc->mkSkolemCached(x, n, SkolemCache::SK_SUFFIX_REM, "dc_spt2"); + newSkolems.push_back(sk2); + Node conc = x.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk2)); + if (options::stringLenConc()) + { + Node lc = nm->mkNode(STRING_LENGTH, isRev ? sk2 : sk1).eqNode(l); + conc = nm->mkNode(AND, conc, lc); + } + return conc; +} + void CoreSolver::getNormalForms(Node eqc, std::vector& normal_forms, std::map& term_to_nf_index, @@ -920,16 +1085,16 @@ void CoreSolver::processNEqc(Node eqc, if (!StringsEntail::canConstantContainList(c, nfi.d_nf, firstc, lastc)) { Node n = nfi.d_base; + std::vector exp(nfi.d_exp.begin(), nfi.d_exp.end()); //conflict Trace("strings-solve") << "Normal form for " << n << " cannot be contained in constant " << c << std::endl; // conflict, explanation is: // n = base ^ base = c ^ relevant porition of ( n = N[n] ) - std::vector< Node > exp; - d_bsolver.explainConstantEqc(n,eqc,exp); // Notice although not implemented, this can be minimized based on // firstc/lastc, normal_forms_exp_depend. - exp.insert(exp.end(), nfi.d_exp.begin(), nfi.d_exp.end()); + d_bsolver.explainConstantEqc(n, eqc, exp); d_im.sendInference(exp, d_false, Inference::N_NCTN); + // conflict, finished return; } } @@ -1023,6 +1188,10 @@ void CoreSolver::processNEqc(Node eqc, return; } } + if (d_im.hasProcessed()) + { + break; + } } if (d_im.hasProcessed() || pinfer.empty()) { @@ -1134,6 +1303,14 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, if (d_state.areEqual(xLenTerm, yLenTerm)) { + std::vector ant; + NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, ant); + if (x.isConst() && y.isConst()) + { + // if both are constant, it's just a constant conflict + d_im.sendInference(ant, d_false, Inference::N_CONST, true); + return; + } // `x` and `y` have the same length. We infer that the two components // have to be the same. // @@ -1142,9 +1319,11 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, << "Simple Case 2 : string lengths are equal" << std::endl; Node eq = x.eqNode(y); Node leneq = xLenTerm.eqNode(yLenTerm); - NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, lenExp); lenExp.push_back(leneq); - d_im.sendInference(lenExp, eq, Inference::N_UNIFY); + // set the explanation for length + Node lant = utils::mkAnd(lenExp); + ant.push_back(lant); + d_im.sendInference(ant, eq, Inference::N_UNIFY); break; } else if ((!x.isConst() && index == nfiv.size() - rproc - 1) @@ -1356,9 +1535,9 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, break; } - // At this point, we know that `nc` is non-empty, so we add that to our - // explanation. - iinfo.d_ant.push_back(expNonEmpty); + // At this point, we know that `nc` is non-empty, so we add expNonEmpty + // to our explanation below. We do this after adding other parts of the + // explanation for consistency with other inferences. size_t ncIndex = index + 1; Node nextConstStr = nfnc.collectConstantStringAt(ncIndex); @@ -1370,35 +1549,11 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // E.g. "abc" ++ ... = nc ++ "b" ++ ... ---> nc = "a" ++ k size_t cIndex = index; Node stra = nfc.collectConstantStringAt(cIndex); - size_t straLen = Word::getLength(stra); Assert(!stra.isNull()); Node strb = nextConstStr; - // Since `nc` is non-empty, we start with character 1 - size_t p; - if (isRev) - { - Node stra1 = Word::prefix(stra, straLen - 1); - p = straLen - Word::roverlap(stra1, strb); - Trace("strings-csp-debug") - << "Compute roverlap : " << stra1 << " " << strb << std::endl; - size_t p2 = Word::rfind(stra1, strb); - p = p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p); - Trace("strings-csp-debug") - << "roverlap : " << stra1 << " " << strb << " returned " << p - << " " << p2 << " " << (p2 == std::string::npos) << std::endl; - } - else - { - Node stra1 = Word::substr(stra, 1); - p = straLen - Word::overlap(stra1, strb); - Trace("strings-csp-debug") - << "Compute overlap : " << stra1 << " " << strb << std::endl; - size_t p2 = Word::find(stra1, strb); - p = p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p); - Trace("strings-csp-debug") - << "overlap : " << stra1 << " " << strb << " returned " << p - << " " << p2 << " " << (p2 == std::string::npos) << std::endl; - } + + // Since `nc` is non-empty, we use the non-empty overlap + size_t p = getSufficientNonEmptyOverlap(stra, strb, isRev); // If we can't split off more than a single character from the // constant, we might as well do regular constant/non-constant @@ -1407,22 +1562,18 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, { NormalForm::getExplanationForPrefixEq( nfc, nfnc, cIndex, ncIndex, iinfo.d_ant); - Node prea = p == straLen ? stra - : (isRev ? Word::suffix(stra, p) - : Word::prefix(stra, p)); + iinfo.d_ant.push_back(expNonEmpty); + // make the conclusion SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = skc->mkSkolemCached( - nc, - prea, - isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT, - "c_spt"); - Trace("strings-csp") - << "Const Split: " << prea << " is removed from " << stra - << " due to " << strb << ", p=" << p << std::endl; - iinfo.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, prea) - : utils::mkNConcat(prea, sk)); - iinfo.d_new_skolem[LENGTH_SPLIT].push_back(sk); + Node xcv = + nm->mkNode(STRING_CONCAT, isRev ? strb : nc, isRev ? nc : strb); + std::vector newSkolems; + iinfo.d_conc = getConclusion( + xcv, stra, PfRule::CONCAT_CPROP, isRev, skc, newSkolems); + Assert(newSkolems.size() == 1); + iinfo.d_new_skolem[LENGTH_SPLIT].push_back(newSkolems[0]); iinfo.d_id = Inference::SSPLIT_CST_PROP; + iinfo.d_idRev = isRev; pinfer.push_back(info); break; } @@ -1432,25 +1583,17 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // to start with the first character of the constant. // // E.g. "abc" ++ ... = nc ++ ... ---> nc = "a" ++ k - Node stra = nfcv[index]; - size_t straLen = Word::getLength(stra); - Node firstChar = straLen == 1 ? stra - : (isRev ? Word::suffix(stra, 1) - : Word::prefix(stra, 1)); SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = skc->mkSkolemCached( - nc, - isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT, - "c_spt"); - Trace("strings-csp") << "Const Split: " << firstChar - << " is removed from " << stra << " (serial) " - << std::endl; + std::vector newSkolems; + iinfo.d_conc = getConclusion( + nc, nfcv[index], PfRule::CONCAT_CSPLIT, isRev, skc, newSkolems); NormalForm::getExplanationForPrefixEq( nfi, nfj, index, index, iinfo.d_ant); - iinfo.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, firstChar) - : utils::mkNConcat(firstChar, sk)); - iinfo.d_new_skolem[LENGTH_SPLIT].push_back(sk); + iinfo.d_ant.push_back(expNonEmpty); + Assert(newSkolems.size() == 1); + iinfo.d_new_skolem[LENGTH_SPLIT].push_back(newSkolems[0]); iinfo.d_id = Inference::SSPLIT_CST; + iinfo.d_idRev = isRev; pinfer.push_back(info); break; } @@ -1465,7 +1608,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Assert(!y.isConst()); int32_t lentTestSuccess = -1; - Node lentTestExp; + Node lenConstraint; if (options::stringCheckEntailLen()) { // If length entailment checks are enabled, we can save the case split by @@ -1489,54 +1632,69 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Trace("strings-entail") << " explanation was : " << et.second << std::endl; lentTestSuccess = e; - lentTestExp = et.second; + lenConstraint = entLit; + // its not explained by the equality engine of this class + iinfo.d_antn.push_back(lenConstraint); break; } } } } + std::vector lcVec; + if (lenConstraint.isNull()) + { + // will do split on length + lenConstraint = nm->mkNode(EQUAL, xLenTerm, yLenTerm).negate(); + lcVec.push_back(lenConstraint); + } NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, iinfo.d_ant); // Add premises for x != "" ^ y != "" for (unsigned xory = 0; xory < 2; xory++) { Node t = xory == 0 ? x : y; - Node tnz = d_state.explainNonEmpty(x); + Node tnz = d_state.explainNonEmpty(t); if (!tnz.isNull()) { - iinfo.d_ant.push_back(tnz); + lcVec.push_back(tnz); } else { tnz = x.eqNode(emp).negate(); + // lcVec.push_back(tnz); iinfo.d_antn.push_back(tnz); } } SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = skc->mkSkolemCached(x, - y, - isRev ? SkolemCache::SK_ID_V_UNIFIED_SPT_REV - : SkolemCache::SK_ID_V_UNIFIED_SPT, - "v_spt"); - iinfo.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk); - Node eq1 = - x.eqNode(isRev ? utils::mkNConcat(sk, y) : utils::mkNConcat(y, sk)); - Node eq2 = - y.eqNode(isRev ? utils::mkNConcat(sk, x) : utils::mkNConcat(x, sk)); - - if (lentTestSuccess != -1) - { - iinfo.d_antn.push_back(lentTestExp); - iinfo.d_conc = lentTestSuccess == 0 ? eq1 : eq2; + std::vector newSkolems; + // make the conclusion + if (lentTestSuccess == -1) + { + iinfo.d_id = Inference::SSPLIT_VAR; + iinfo.d_conc = + getConclusion(x, y, PfRule::CONCAT_SPLIT, isRev, skc, newSkolems); + if (options::stringUnifiedVSpt() && !options::stringLenConc()) + { + Assert(newSkolems.size() == 1); + iinfo.d_new_skolem[LENGTH_GEQ_ONE].push_back(newSkolems[0]); + } + } + else if (lentTestSuccess == 0) + { iinfo.d_id = Inference::SSPLIT_VAR_PROP; + iinfo.d_conc = + getConclusion(x, y, PfRule::CONCAT_LPROP, isRev, skc, newSkolems); } else { - Node ldeq = nm->mkNode(EQUAL, xLenTerm, yLenTerm).negate(); - iinfo.d_ant.push_back(ldeq); - iinfo.d_conc = nm->mkNode(OR, eq1, eq2); - iinfo.d_id = Inference::SSPLIT_VAR; + Assert(lentTestSuccess == 1); + iinfo.d_id = Inference::SSPLIT_VAR_PROP; + iinfo.d_conc = + getConclusion(y, x, PfRule::CONCAT_LPROP, isRev, skc, newSkolems); } + Node lc = utils::mkAnd(lcVec); + iinfo.d_ant.push_back(lc); + iinfo.d_idRev = isRev; pinfer.push_back(info); break; } @@ -1675,10 +1833,6 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, } } - Node ant = d_im.mkExplain(iinfo.d_ant); - iinfo.d_ant.clear(); - iinfo.d_antn.push_back(ant); - Node str_in_re; if (s_zy == t_yz && r == emp && s_zy.isConst() && s_zy.getConst().isRepeated()) @@ -1922,32 +2076,29 @@ void CoreSolver::processDeq(Node ni, Node nj) { // Either `x` or `y` is a constant and it is not know whether the // non-empty non-constant is of length one. We split the non-constant - // into a string of length one and the remainder and split on whether - // the first character of the constant and the non-constant are - // equal. + // into a string of length one and the remainder. // - // E.g. x ++ x' ++ ... != "abc" ++ y' ++ ... ^ len(x) != "" ---> - // x = k1 ++ k2 ^ len(k1) = 1 ^ (k1 != "a" v x = "a" ++ k2) + // len(x)>=1 => x = k1 ++ k2 ^ len(k1) = 1 SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = - skc->mkSkolemCached(nck, SkolemCache::SK_ID_DC_SPT, "dc_spt"); - d_termReg.registerTermAtomic(sk, LENGTH_ONE); - Node skr = skc->mkSkolemCached( - nck, SkolemCache::SK_ID_DC_SPT_REM, "dc_spt_rem"); - Node eq1 = nck.eqNode(nm->mkNode(kind::STRING_CONCAT, sk, skr)); - eq1 = Rewriter::rewrite(eq1); - Node eq2 = - nck.eqNode(nm->mkNode(kind::STRING_CONCAT, firstChar, skr)); - std::vector antec(nfni.d_exp.begin(), nfni.d_exp.end()); - antec.insert(antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); - antec.push_back(expNonEmpty); - d_im.sendInference( - antec, - nm->mkNode( - OR, nm->mkNode(AND, eq1, sk.eqNode(firstChar).negate()), eq2), - Inference::DEQ_DISL_FIRST_CHAR_STRING_SPLIT, - true); - d_im.sendPhaseRequirement(eq1, true); + std::vector newSkolems; + Node conc = + getDecomposeConclusion(nck, d_one, false, skc, newSkolems); + Assert(newSkolems.size() == 2); + if (options::stringLenConc()) + { + d_termReg.registerTermAtomic(newSkolems[0], LENGTH_IGNORE); + } + else + { + d_termReg.registerTermAtomic(newSkolems[0], LENGTH_ONE); + } + std::vector antecLen; + antecLen.push_back(nm->mkNode(GEQ, nckLenTerm, d_one)); + d_im.sendInference({}, + antecLen, + conc, + Inference::DEQ_DISL_FIRST_CHAR_STRING_SPLIT, + true); return; } } @@ -1957,47 +2108,35 @@ void CoreSolver::processDeq(Node ni, Node nj) // are both non-constants. We split them into parts that have the same // lengths. // - // E.g. x ++ x' ++ ... != y ++ y' ++ ... ^ len(x) != len(y) ---> - // len(k1) = len(x) ^ len(k2) = len(y) ^ - // (y = k1 ++ k3 v x = k1 ++ k2) - Trace("strings-solve") << "Non-Simple Case 1 : add lemma " << std::endl; - std::vector antec(nfni.d_exp.begin(), nfni.d_exp.end()); - antec.insert(antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); - std::vector antecNewLits; - - if (d_state.areDisequal(ni, nj)) - { - antec.push_back(ni.eqNode(nj).negate()); - } - else + // len(x) >= len(y) => x = k1 ++ k2 ^ len(k1) = len(y) + // len(y) >= len(x) => y = k3 ++ k4 ^ len(k3) = len(x) + Trace("strings-solve") + << "Non-Simple Case 1 : add lemmas " << std::endl; + SkolemCache* skc = d_termReg.getSkolemCache(); + + for (unsigned r = 0; r < 2; r++) { - antecNewLits.push_back(ni.eqNode(nj).negate()); + Node ux = r == 0 ? x : y; + Node uy = r == 0 ? y : x; + Node uxLen = nm->mkNode(STRING_LENGTH, ux); + Node uyLen = nm->mkNode(STRING_LENGTH, uy); + std::vector newSkolems; + Node conc = getDecomposeConclusion(ux, uyLen, false, skc, newSkolems); + Assert(newSkolems.size() == 2); + if (options::stringLenConc()) + { + d_termReg.registerTermAtomic(newSkolems[0], LENGTH_IGNORE); + } + else + { + d_termReg.registerTermAtomic(newSkolems[0], LENGTH_GEQ_ONE); + } + std::vector antecLen; + antecLen.push_back(nm->mkNode(GEQ, uxLen, uyLen)); + d_im.sendInference( + {}, antecLen, conc, Inference::DEQ_DISL_STRINGS_SPLIT, true); } - antecNewLits.push_back(xLenTerm.eqNode(yLenTerm).negate()); - std::vector conc; - SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk1 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit"); - Node sk2 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit"); - Node sk3 = - skc->mkSkolemCached(y, x, SkolemCache::SK_ID_V_SPT, "z_dsplit"); - Node sk4 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_V_SPT, "w_dsplit"); - d_termReg.registerTermAtomic(sk3, LENGTH_GEQ_ONE); - Node sk1Len = utils::mkNLength(sk1); - conc.push_back(sk1Len.eqNode(xLenTerm)); - Node sk2Len = utils::mkNLength(sk2); - conc.push_back(sk2Len.eqNode(yLenTerm)); - conc.push_back(nm->mkNode(OR, - y.eqNode(utils::mkNConcat(sk1, sk3)), - x.eqNode(utils::mkNConcat(sk2, sk4)))); - d_im.sendInference(antec, - antecNewLits, - nm->mkNode(AND, conc), - Inference::DEQ_DISL_STRINGS_SPLIT, - true); return; } } @@ -2366,14 +2505,14 @@ void CoreSolver::checkLengthsEqc() { // if not, add the lemma std::vector ant; ant.insert(ant.end(), nfi.d_exp.begin(), nfi.d_exp.end()); - ant.push_back(nfi.d_base.eqNode(lt)); + ant.push_back(lt.eqNode(nfi.d_base)); Node lc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, nf); Node lcr = Rewriter::rewrite(lc); Trace("strings-process-debug") << "Rewrote length " << lc << " to " << lcr << std::endl; if (!d_state.areEqual(llt, lcr)) { - Node eq = llt.eqNode(lcr); + Node eq = llt.eqNode(lc); ei->d_normalizedLength.set(eq); d_im.sendInference(ant, eq, Inference::LEN_NORM, true); } @@ -2385,9 +2524,9 @@ bool CoreSolver::processInferInfo(CoreInferInfo& cii) { InferInfo& ii = cii.d_infer; // rewrite the conclusion, ensure non-trivial - ii.d_conc = Rewriter::rewrite(ii.d_conc); + Node concr = Rewriter::rewrite(ii.d_conc); - if (ii.isTrivial()) + if (concr == d_true) { // conclusion rewrote to true return false; diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h index db1f5ecf6..2ee61e759 100644 --- a/src/theory/strings/core_solver.h +++ b/src/theory/strings/core_solver.h @@ -81,9 +81,7 @@ class CoreSolver typedef context::CDHashMap NodeIntMap; public: - CoreSolver(context::Context* c, - context::UserContext* u, - SolverState& s, + CoreSolver(SolverState& s, InferenceManager& im, TermRegistry& tr, BaseSolver& bs); @@ -219,6 +217,64 @@ class CoreSolver */ Node getNormalString(Node x, std::vector& nf_exp); //-------------------------- end query functions + + /** + * This returns the conclusion of the proof rule corresponding to splitting + * on the arrangement of terms x and y appearing in an equation of the form + * x ++ x' = y ++ y' or x' ++ x = y' ++ y + * where we are in the second case if isRev is true. This method is called + * both by the core solver and by the strings proof checker. + * + * @param x The first term + * @param y The second term + * @param rule The proof rule whose conclusion we are asking for + * @param isRev Whether the equation is in a reverse direction + * @param skc The skolem cache (to allocate fresh variables if necessary) + * @param newSkolems The vector to add new variables to + * @return The conclusion of the inference. + */ + static Node getConclusion(Node x, + Node y, + PfRule rule, + bool isRev, + SkolemCache* skc, + std::vector& newSkolems); + /** + * Get sufficient non-empty overlap of string constants c and d. + * + * This is called when handling equations of the form: + * x ++ d ++ ... = c ++ ... + * when x is non-empty and non-constant. + * + * This returns the maximal index in c which x must have as a prefix, which + * notice is an integer >= 1 since x is non-empty. + * + * @param c The first constant + * @param d The second constant + * @param isRev Whether the equation is in the reverse direction + * @return The position in c. + */ + static size_t getSufficientNonEmptyOverlap(Node c, Node d, bool isRev); + /** + * This returns the conclusion of the decompose proof rule. This returns + * a conjunction of splitting string x into pieces based on length l, e.g.: + * x = k_1 ++ k_2 + * where k_1 (resp. k_2) is a skolem corresponding to a substring of x of + * length l if isRev is false (resp. true). + * + * @param x The string term + * @param l The length term + * @param isRev Whether the equation is in a reverse direction + * @param skc The skolem cache (to allocate fresh variables if necessary) + * @param newSkolems The vector to add new variables to + * @return The conclusion of the inference. + */ + static Node getDecomposeConclusion(Node x, + Node l, + bool isRev, + SkolemCache* skc, + std::vector& newSkolems); + private: /** * This processes the infer info ii as an inference. In more detail, it calls diff --git a/src/theory/strings/infer_info.cpp b/src/theory/strings/infer_info.cpp index c75e03440..174bbe2b7 100644 --- a/src/theory/strings/infer_info.cpp +++ b/src/theory/strings/infer_info.cpp @@ -95,7 +95,7 @@ std::ostream& operator<<(std::ostream& out, Inference i) return out; } -InferInfo::InferInfo() : d_id(Inference::NONE) {} +InferInfo::InferInfo() : d_id(Inference::NONE), d_idRev(false) {} bool InferInfo::isTrivial() const { @@ -119,6 +119,10 @@ bool InferInfo::isFact() const std::ostream& operator<<(std::ostream& out, const InferInfo& ii) { out << "(infer " << ii.d_id << " " << ii.d_conc; + if (ii.d_idRev) + { + out << " :rev"; + } if (!ii.d_ant.empty()) { out << " :ant (" << ii.d_ant << ")"; diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h index 2a42b9fab..7d41dca98 100644 --- a/src/theory/strings/infer_info.h +++ b/src/theory/strings/infer_info.h @@ -347,6 +347,8 @@ class InferInfo ~InferInfo() {} /** The inference identifier */ Inference d_id; + /** Whether it is the reverse form of the above id */ + bool d_idRev; /** The conclusion */ Node d_conc; /** diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 03f9d8d1c..1fffd9638 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -50,7 +50,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_im(c, u, d_state, d_termReg, d_extTheory, out, d_statistics), d_rewriter(&d_statistics.d_rewrites), d_bsolver(d_state, d_im), - d_csolver(c, u, d_state, d_im, d_termReg, d_bsolver), + d_csolver(d_state, d_im, d_termReg, d_bsolver), d_esolver(c, u, d_state,