Properly guard proof construction for STRINGS_EXTF_EQ_REW (#7519)
[cvc5.git] / src / theory / strings / infer_proof_cons.cpp
1 /******************************************************************************
2 * Top contributors (to current version):
3 * Andrew Reynolds, Gereon Kremer, Aina Niemetz
4 *
5 * This file is part of the cvc5 project.
6 *
7 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8 * in the top-level source directory and their institutional affiliations.
9 * All rights reserved. See the file COPYING in the top-level source
10 * directory for licensing information.
11 * ****************************************************************************
12 *
13 * Implementation of inference to proof conversion.
14 */
15
16 #include "theory/strings/infer_proof_cons.h"
17
18 #include "expr/skolem_manager.h"
19 #include "options/smt_options.h"
20 #include "options/strings_options.h"
21 #include "proof/proof_node_manager.h"
22 #include "theory/builtin/proof_checker.h"
23 #include "theory/rewriter.h"
24 #include "theory/strings/regexp_operation.h"
25 #include "theory/strings/theory_strings_utils.h"
26 #include "util/statistics_registry.h"
27
28 using namespace cvc5::kind;
29
30 namespace cvc5 {
31 namespace theory {
32 namespace strings {
33
34 InferProofCons::InferProofCons(context::Context* c,
35 ProofNodeManager* pnm,
36 SequencesStatistics& statistics)
37 : d_pnm(pnm), d_lazyFactMap(c), d_statistics(statistics)
38 {
39 Assert(d_pnm != nullptr);
40 }
41
42 void InferProofCons::notifyFact(const InferInfo& ii)
43 {
44 Node fact = ii.d_conc;
45 Trace("strings-ipc-debug")
46 << "InferProofCons::notifyFact: " << ii << std::endl;
47 if (d_lazyFactMap.find(fact) != d_lazyFactMap.end())
48 {
49 Trace("strings-ipc-debug") << "...duplicate!" << std::endl;
50 return;
51 }
52 Node symFact = CDProof::getSymmFact(fact);
53 if (!symFact.isNull() && d_lazyFactMap.find(symFact) != d_lazyFactMap.end())
54 {
55 Trace("strings-ipc-debug") << "...duplicate (sym)!" << std::endl;
56 return;
57 }
58 std::shared_ptr<InferInfo> iic = std::make_shared<InferInfo>(ii);
59 d_lazyFactMap.insert(ii.d_conc, iic);
60 }
61
62 void InferProofCons::notifyLemma(const InferInfo& ii)
63 {
64 d_lazyFactMap[ii.d_conc] = std::make_shared<InferInfo>(ii);
65 }
66
67 bool InferProofCons::addProofTo(CDProof* pf,
68 Node conc,
69 InferenceId infer,
70 bool isRev,
71 const std::vector<Node>& exp)
72 {
73 // now go back and convert it to proof steps and add to proof
74 bool useBuffer = false;
75 ProofStep ps;
76 TheoryProofStepBuffer psb(pf->getManager()->getChecker());
77 // run the conversion
78 convert(infer, isRev, conc, exp, ps, psb, useBuffer);
79 // make the proof based on the step or the buffer
80 if (useBuffer)
81 {
82 if (!pf->addSteps(psb))
83 {
84 return false;
85 }
86 }
87 else
88 {
89 if (!pf->addStep(conc, ps))
90 {
91 return false;
92 }
93 }
94 return true;
95 }
96
97 void InferProofCons::packArgs(Node conc,
98 InferenceId infer,
99 bool isRev,
100 const std::vector<Node>& exp,
101 std::vector<Node>& args)
102 {
103 args.push_back(conc);
104 args.push_back(mkInferenceIdNode(infer));
105 args.push_back(NodeManager::currentNM()->mkConst(isRev));
106 // The vector exp is stored as arguments; its flatten form are premises. We
107 // need both since the grouping of exp is important, e.g. { (and a b), c }
108 // is different from { a, b, c } in the convert routine, since positions
109 // of formulas in exp have special meaning.
110 args.insert(args.end(), exp.begin(), exp.end());
111 }
112
113 bool InferProofCons::unpackArgs(const std::vector<Node>& args,
114 Node& conc,
115 InferenceId& infer,
116 bool& isRev,
117 std::vector<Node>& exp)
118 {
119 Assert(args.size() >= 3);
120 conc = args[0];
121 if (!getInferenceId(args[1], infer))
122 {
123 return false;
124 }
125 isRev = args[2].getConst<bool>();
126 exp.insert(exp.end(), args.begin() + 3, args.end());
127 return true;
128 }
129
130 void InferProofCons::convert(InferenceId infer,
131 bool isRev,
132 Node conc,
133 const std::vector<Node>& exp,
134 ProofStep& ps,
135 TheoryProofStepBuffer& psb,
136 bool& useBuffer)
137 {
138 // by default, don't use the buffer
139 useBuffer = false;
140 // Must flatten children with respect to AND to be ready to explain.
141 // We store the index where each flattened vector begins, since some
142 // explanations are grouped together using AND.
143 std::vector<size_t> startExpIndex;
144 for (const Node& ec : exp)
145 {
146 // store the index in the flattened vector
147 startExpIndex.push_back(ps.d_children.size());
148 utils::flattenOp(AND, ec, ps.d_children);
149 }
150 // debug print
151 if (Trace.isOn("strings-ipc-debug"))
152 {
153 Trace("strings-ipc-debug") << "InferProofCons::convert: " << infer
154 << (isRev ? " :rev " : " ") << conc << std::endl;
155 for (const Node& ec : ps.d_children)
156 {
157 Trace("strings-ipc-debug") << " e: " << ec << std::endl;
158 }
159 }
160 // try to find a set of proof steps to incorporate into the buffer
161 psb.clear();
162 NodeManager* nm = NodeManager::currentNM();
163 Node nodeIsRev = nm->mkConst(isRev);
164 switch (infer)
165 {
166 // ========================== equal by substitution+rewriting
167 case InferenceId::STRINGS_I_NORM_S:
168 case InferenceId::STRINGS_I_CONST_MERGE:
169 case InferenceId::STRINGS_I_NORM:
170 case InferenceId::STRINGS_LEN_NORM:
171 case InferenceId::STRINGS_NORMAL_FORM:
172 case InferenceId::STRINGS_CODE_PROXY:
173 {
174 std::vector<Node> pcs = ps.d_children;
175 Node pconc = conc;
176 // purify core substitution proves conc from pconc if necessary,
177 // we apply MACRO_SR_PRED_INTRO to prove pconc
178 if (purifyCoreSubstitution(pconc, pcs, psb, false))
179 {
180 if (psb.applyPredIntro(pconc, pcs))
181 {
182 useBuffer = true;
183 }
184 }
185 }
186 break;
187 // ========================== substitution + rewriting
188 case InferenceId::STRINGS_RE_NF_CONFLICT:
189 case InferenceId::STRINGS_EXTF:
190 case InferenceId::STRINGS_EXTF_N:
191 case InferenceId::STRINGS_EXTF_D:
192 case InferenceId::STRINGS_EXTF_D_N:
193 case InferenceId::STRINGS_I_CONST_CONFLICT:
194 case InferenceId::STRINGS_UNIT_CONST_CONFLICT:
195 {
196 if (!ps.d_children.empty())
197 {
198 std::vector<Node> exps(ps.d_children.begin(), ps.d_children.end() - 1);
199 Node src = ps.d_children[ps.d_children.size() - 1];
200 if (psb.applyPredTransform(src, conc, exps))
201 {
202 useBuffer = true;
203 }
204 }
205 else
206 {
207 // use the predicate version?
208 ps.d_args.push_back(conc);
209 ps.d_rule = PfRule::MACRO_SR_PRED_INTRO;
210 }
211 }
212 break;
213 // ========================== rewrite pred
214 case InferenceId::STRINGS_EXTF_EQ_REW:
215 {
216 // the last child is the predicate we are operating on, move to front
217 Node src = ps.d_children[ps.d_children.size() - 1];
218 std::vector<Node> expe(ps.d_children.begin(), ps.d_children.end() - 1);
219 // start with a default rewrite
220 Trace("strings-ipc-core")
221 << "Generate proof for STRINGS_EXTF_EQ_REW, starting with " << src
222 << std::endl;
223 Node mainEqSRew = psb.applyPredElim(src, expe);
224 Trace("strings-ipc-core")
225 << "...after pred elim: " << mainEqSRew << std::endl;
226 if (mainEqSRew == conc)
227 {
228 Trace("strings-ipc-core") << "...success" << std::endl;
229 useBuffer = true;
230 break;
231 }
232 else if (mainEqSRew.getKind() != EQUAL)
233 {
234 // Note this can happen in rare cases where substitution+rewriting
235 // is more powerful than congruence+rewriting. We fail to reconstruct
236 // the proof in this case.
237 Trace("strings-ipc-core")
238 << "...failed, not equality after rewriting" << std::endl;
239 break;
240 }
241 // may need the "extended equality rewrite"
242 Node mainEqSRew2 = psb.applyPredElim(mainEqSRew,
243 {},
244 MethodId::SB_DEFAULT,
245 MethodId::SBA_SEQUENTIAL,
246 MethodId::RW_REWRITE_EQ_EXT);
247 Trace("strings-ipc-core")
248 << "...after extended equality rewrite: " << mainEqSRew2 << std::endl;
249 if (mainEqSRew2 == conc)
250 {
251 useBuffer = true;
252 break;
253 }
254 // rewrite again with default rewriter
255 Node mainEqSRew3 = psb.applyPredElim(mainEqSRew2, {});
256 useBuffer = (mainEqSRew3 == conc);
257 }
258 break;
259 // ========================== substitution+rewriting, CONCAT_EQ, ...
260 case InferenceId::STRINGS_F_CONST:
261 case InferenceId::STRINGS_F_UNIFY:
262 case InferenceId::STRINGS_F_ENDPOINT_EMP:
263 case InferenceId::STRINGS_F_ENDPOINT_EQ:
264 case InferenceId::STRINGS_F_NCTN:
265 case InferenceId::STRINGS_N_EQ_CONF:
266 case InferenceId::STRINGS_N_CONST:
267 case InferenceId::STRINGS_N_UNIFY:
268 case InferenceId::STRINGS_N_ENDPOINT_EMP:
269 case InferenceId::STRINGS_N_ENDPOINT_EQ:
270 case InferenceId::STRINGS_N_NCTN:
271 case InferenceId::STRINGS_SSPLIT_CST_PROP:
272 case InferenceId::STRINGS_SSPLIT_VAR_PROP:
273 case InferenceId::STRINGS_SSPLIT_CST:
274 case InferenceId::STRINGS_SSPLIT_VAR:
275 {
276 Trace("strings-ipc-core") << "Generate core rule for " << infer
277 << " (rev=" << isRev << ")" << std::endl;
278 // All of the above inferences have the form:
279 // (explanation for why t and s have the same prefix/suffix) ^
280 // t = s ^
281 // (length constraint)?
282 // We call t=s the "main equality" below. The length constraint is
283 // optional, which we split on below.
284 size_t nchild = ps.d_children.size();
285 size_t mainEqIndex = 0;
286 bool mainEqIndexSet = false;
287 // the length constraint
288 std::vector<Node> lenConstraint;
289 // these inferences have a length constraint as the last explain
290 if (infer == InferenceId::STRINGS_N_UNIFY || infer == InferenceId::STRINGS_F_UNIFY
291 || infer == InferenceId::STRINGS_SSPLIT_CST || infer == InferenceId::STRINGS_SSPLIT_VAR
292 || infer == InferenceId::STRINGS_SSPLIT_VAR_PROP
293 || infer == InferenceId::STRINGS_SSPLIT_CST_PROP)
294 {
295 if (exp.size() >= 2)
296 {
297 Assert(exp.size() <= startExpIndex.size());
298 // The index of the "main" equality is the last equality before
299 // the length explanation.
300 mainEqIndex = startExpIndex[exp.size() - 1] - 1;
301 mainEqIndexSet = true;
302 // the remainder is the length constraint
303 lenConstraint.insert(lenConstraint.end(),
304 ps.d_children.begin() + mainEqIndex + 1,
305 ps.d_children.end());
306 }
307 }
308 else if (nchild >= 1)
309 {
310 // The index of the main equality is the last child.
311 mainEqIndex = nchild - 1;
312 mainEqIndexSet = true;
313 }
314 Node mainEq;
315 if (mainEqIndexSet)
316 {
317 mainEq = ps.d_children[mainEqIndex];
318 Trace("strings-ipc-core") << "Main equality " << mainEq << " at index "
319 << mainEqIndex << std::endl;
320 }
321 if (mainEq.isNull() || mainEq.getKind() != EQUAL)
322 {
323 Trace("strings-ipc-core")
324 << "...failed to find main equality" << std::endl;
325 break;
326 }
327 // apply MACRO_SR_PRED_ELIM using equalities up to the main eq
328 // we purify the core substitution
329 std::vector<Node> pcsr(ps.d_children.begin(),
330 ps.d_children.begin() + mainEqIndex);
331 Node pmainEq = mainEq;
332 // we transform mainEq to pmainEq and then use this as the first
333 // argument to MACRO_SR_PRED_ELIM.
334 if (!purifyCoreSubstitution(pmainEq, pcsr, psb, true))
335 {
336 break;
337 }
338 std::vector<Node> childrenSRew;
339 childrenSRew.push_back(pmainEq);
340 childrenSRew.insert(childrenSRew.end(), pcsr.begin(), pcsr.end());
341 // now, conclude the proper equality
342 Node mainEqSRew =
343 psb.tryStep(PfRule::MACRO_SR_PRED_ELIM, childrenSRew, {});
344 if (CDProof::isSame(mainEqSRew, pmainEq))
345 {
346 Trace("strings-ipc-core") << "...undo step" << std::endl;
347 // the rule added above was not necessary
348 psb.popStep();
349 }
350 else if (mainEqSRew == conc)
351 {
352 Trace("strings-ipc-core") << "...success after rewrite!" << std::endl;
353 useBuffer = true;
354 break;
355 }
356 Trace("strings-ipc-core")
357 << "Main equality after subs+rewrite " << mainEqSRew << std::endl;
358 // now, apply CONCAT_EQ to get the remainder
359 std::vector<Node> childrenCeq;
360 childrenCeq.push_back(mainEqSRew);
361 std::vector<Node> argsCeq;
362 argsCeq.push_back(nodeIsRev);
363 Node mainEqCeq = psb.tryStep(PfRule::CONCAT_EQ, childrenCeq, argsCeq);
364 Trace("strings-ipc-core")
365 << "Main equality after CONCAT_EQ " << mainEqCeq << std::endl;
366 if (mainEqCeq.isNull() || mainEqCeq.getKind() != EQUAL)
367 {
368 // fail
369 break;
370 }
371 else if (mainEqCeq == mainEqSRew)
372 {
373 Trace("strings-ipc-core") << "...undo step" << std::endl;
374 // not necessary, probably first component of equality
375 psb.popStep();
376 }
377 // Now, mainEqCeq is an equality t ++ ... == s ++ ... where the
378 // inference involved t and s.
379 if (infer == InferenceId::STRINGS_N_ENDPOINT_EQ
380 || infer == InferenceId::STRINGS_N_ENDPOINT_EMP
381 || infer == InferenceId::STRINGS_F_ENDPOINT_EQ
382 || infer == InferenceId::STRINGS_F_ENDPOINT_EMP)
383 {
384 // Should be equal to conclusion already, or rewrite to it.
385 // Notice that this step is necessary to handle the "rproc"
386 // optimization in processSimpleNEq. Alternatively, this could
387 // possibly be done by CONCAT_EQ with !isRev.
388 std::vector<Node> cexp;
389 if (psb.applyPredTransform(mainEqCeq,
390 conc,
391 cexp,
392 MethodId::SB_DEFAULT,
393 MethodId::SBA_SEQUENTIAL,
394 MethodId::RW_REWRITE_EQ_EXT))
395 {
396 Trace("strings-ipc-core") << "Transformed to " << conc
397 << " via pred transform" << std::endl;
398 // success
399 useBuffer = true;
400 Trace("strings-ipc-core") << "...success!" << std::endl;
401 }
402 // Otherwise, note that EMP rules conclude ti = "" where
403 // t1 ++ ... ++ tn == "". However, these are very rarely applied, let
404 // alone for 2+ children. This case is intentionally unhandled here.
405 }
406 else if (infer == InferenceId::STRINGS_N_CONST || infer == InferenceId::STRINGS_F_CONST
407 || infer == InferenceId::STRINGS_N_EQ_CONF)
408 {
409 // should be a constant conflict
410 std::vector<Node> childrenC;
411 childrenC.push_back(mainEqCeq);
412 std::vector<Node> argsC;
413 argsC.push_back(nodeIsRev);
414 Node mainEqC = psb.tryStep(PfRule::CONCAT_CONFLICT, childrenC, argsC);
415 if (mainEqC == conc)
416 {
417 useBuffer = true;
418 Trace("strings-ipc-core") << "...success!" << std::endl;
419 }
420 }
421 else if (infer == InferenceId::STRINGS_F_NCTN
422 || infer == InferenceId::STRINGS_N_NCTN)
423 {
424 // May require extended equality rewrite, applied after the rewrite
425 // above. Notice we need both in sequence since ext equality rewriting
426 // is not recursive.
427 std::vector<Node> argsERew;
428 addMethodIds(argsERew,
429 MethodId::SB_DEFAULT,
430 MethodId::SBA_SEQUENTIAL,
431 MethodId::RW_REWRITE_EQ_EXT);
432 Node mainEqERew =
433 psb.tryStep(PfRule::MACRO_SR_PRED_ELIM, {mainEqCeq}, argsERew);
434 if (mainEqERew == conc)
435 {
436 useBuffer = true;
437 Trace("strings-ipc-core") << "...success!" << std::endl;
438 }
439 }
440 else
441 {
442 std::vector<Node> tvec;
443 std::vector<Node> svec;
444 utils::getConcat(mainEqCeq[0], tvec);
445 utils::getConcat(mainEqCeq[1], svec);
446 Node t0 = tvec[isRev ? tvec.size() - 1 : 0];
447 Node s0 = svec[isRev ? svec.size() - 1 : 0];
448 bool applySym = false;
449 // may need to apply symmetry
450 if ((infer == InferenceId::STRINGS_SSPLIT_CST
451 || infer == InferenceId::STRINGS_SSPLIT_CST_PROP)
452 && t0.isConst())
453 {
454 Assert(!s0.isConst());
455 applySym = true;
456 std::swap(t0, s0);
457 }
458 if (infer == InferenceId::STRINGS_N_UNIFY || infer == InferenceId::STRINGS_F_UNIFY)
459 {
460 if (conc.getKind() != EQUAL)
461 {
462 break;
463 }
464 // one side should match, the other side could be a split constant
465 if (conc[0] != t0 && conc[1] != s0)
466 {
467 applySym = true;
468 std::swap(t0, s0);
469 }
470 Assert(conc[0].isConst() == t0.isConst());
471 Assert(conc[1].isConst() == s0.isConst());
472 }
473 PfRule rule = PfRule::UNKNOWN;
474 // the form of the required length constraint expected by the proof
475 Node lenReq;
476 bool lenSuccess = false;
477 if (infer == InferenceId::STRINGS_N_UNIFY || infer == InferenceId::STRINGS_F_UNIFY)
478 {
479 // the required premise for unify is always len(x) = len(y),
480 // however the explanation may not be literally this. Thus, we
481 // need to reconstruct a proof from the given explanation.
482 // it should be the case that lenConstraint => lenReq.
483 // We use terms in the conclusion equality, not t0, s0 here.
484 lenReq = nm->mkNode(STRING_LENGTH, conc[0])
485 .eqNode(nm->mkNode(STRING_LENGTH, conc[1]));
486 lenSuccess = convertLengthPf(lenReq, lenConstraint, psb);
487 rule = PfRule::CONCAT_UNIFY;
488 }
489 else if (infer == InferenceId::STRINGS_SSPLIT_VAR)
490 {
491 // it should be the case that lenConstraint => lenReq
492 lenReq = nm->mkNode(STRING_LENGTH, t0)
493 .eqNode(nm->mkNode(STRING_LENGTH, s0))
494 .notNode();
495 lenSuccess = convertLengthPf(lenReq, lenConstraint, psb);
496 rule = PfRule::CONCAT_SPLIT;
497 }
498 else if (infer == InferenceId::STRINGS_SSPLIT_CST)
499 {
500 // it should be the case that lenConstraint => lenReq
501 lenReq = nm->mkNode(STRING_LENGTH, t0)
502 .eqNode(nm->mkConst(Rational(0)))
503 .notNode();
504 lenSuccess = convertLengthPf(lenReq, lenConstraint, psb);
505 rule = PfRule::CONCAT_CSPLIT;
506 }
507 else if (infer == InferenceId::STRINGS_SSPLIT_VAR_PROP)
508 {
509 // it should be the case that lenConstraint => lenReq
510 for (unsigned r = 0; r < 2; r++)
511 {
512 lenReq = nm->mkNode(GT,
513 nm->mkNode(STRING_LENGTH, t0),
514 nm->mkNode(STRING_LENGTH, s0));
515 if (convertLengthPf(lenReq, lenConstraint, psb))
516 {
517 lenSuccess = true;
518 break;
519 }
520 if (r == 0)
521 {
522 // may be the other direction
523 applySym = true;
524 std::swap(t0, s0);
525 }
526 }
527 rule = PfRule::CONCAT_LPROP;
528 }
529 else if (infer == InferenceId::STRINGS_SSPLIT_CST_PROP)
530 {
531 // it should be the case that lenConstraint => lenReq
532 lenReq = nm->mkNode(STRING_LENGTH, t0)
533 .eqNode(nm->mkConst(Rational(0)))
534 .notNode();
535 lenSuccess = convertLengthPf(lenReq, lenConstraint, psb);
536 rule = PfRule::CONCAT_CPROP;
537 }
538 if (!lenSuccess)
539 {
540 Trace("strings-ipc-core")
541 << "...failed due to length constraint" << std::endl;
542 break;
543 }
544 // apply symmetry if necessary
545 if (applySym)
546 {
547 std::vector<Node> childrenSymm;
548 childrenSymm.push_back(mainEqCeq);
549 // note this explicit step may not be necessary
550 mainEqCeq = psb.tryStep(PfRule::SYMM, childrenSymm, {});
551 Trace("strings-ipc-core")
552 << "Main equality after SYMM " << mainEqCeq << std::endl;
553 }
554 if (rule != PfRule::UNKNOWN)
555 {
556 Trace("strings-ipc-core")
557 << "Core rule length requirement is " << lenReq << std::endl;
558 // apply the given rule
559 std::vector<Node> childrenMain;
560 childrenMain.push_back(mainEqCeq);
561 childrenMain.push_back(lenReq);
562 std::vector<Node> argsMain;
563 argsMain.push_back(nodeIsRev);
564 Node mainEqMain = psb.tryStep(rule, childrenMain, argsMain);
565 Trace("strings-ipc-core") << "Main equality after " << rule << " "
566 << mainEqMain << std::endl;
567 if (mainEqMain == mainEqCeq)
568 {
569 Trace("strings-ipc-core") << "...undo step" << std::endl;
570 // not necessary, probably first component of equality
571 psb.popStep();
572 }
573 // either equal or rewrites to it
574 std::vector<Node> cexp;
575 if (psb.applyPredTransform(mainEqMain, conc, cexp))
576 {
577 // requires that length success is also true
578 useBuffer = true;
579 Trace("strings-ipc-core") << "...success" << std::endl;
580 }
581 else
582 {
583 Trace("strings-ipc-core") << "...fail" << std::endl;
584 }
585 }
586 else
587 {
588 // should always have given a rule to try above
589 Assert(false) << "No reconstruction rule given for " << infer;
590 }
591 }
592 }
593 break;
594 // ========================== Disequalities
595 case InferenceId::STRINGS_DEQ_DISL_FIRST_CHAR_STRING_SPLIT:
596 case InferenceId::STRINGS_DEQ_DISL_STRINGS_SPLIT:
597 {
598 if (conc.getKind() != AND || conc.getNumChildren() != 2
599 || conc[0].getKind() != EQUAL || !conc[0][0].getType().isStringLike()
600 || conc[1].getKind() != EQUAL
601 || conc[1][0].getKind() != STRING_LENGTH)
602 {
603 Trace("strings-ipc-deq") << "malformed application" << std::endl;
604 Assert(false) << "unexpected conclusion " << conc << " for " << infer;
605 }
606 else
607 {
608 Node lenReq =
609 nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, conc[0][0]), conc[1][1]);
610 Trace("strings-ipc-deq")
611 << "length requirement is " << lenReq << std::endl;
612 if (convertLengthPf(lenReq, ps.d_children, psb))
613 {
614 Trace("strings-ipc-deq") << "...success length" << std::endl;
615 // make the proof
616 std::vector<Node> childrenMain;
617 childrenMain.push_back(lenReq);
618 std::vector<Node> argsMain;
619 argsMain.push_back(nodeIsRev);
620 Node mainConc =
621 psb.tryStep(PfRule::STRING_DECOMPOSE, childrenMain, argsMain);
622 Trace("strings-ipc-deq")
623 << "...main conclusion is " << mainConc << std::endl;
624 useBuffer = (mainConc == conc);
625 Trace("strings-ipc-deq")
626 << "...success is " << useBuffer << std::endl;
627 }
628 else
629 {
630 Trace("strings-ipc-deq") << "...fail length" << std::endl;
631 }
632 }
633 }
634 break;
635 // ========================== Boolean split
636 case InferenceId::STRINGS_CARD_SP:
637 case InferenceId::STRINGS_LEN_SPLIT:
638 case InferenceId::STRINGS_LEN_SPLIT_EMP:
639 case InferenceId::STRINGS_DEQ_DISL_EMP_SPLIT:
640 case InferenceId::STRINGS_DEQ_DISL_FIRST_CHAR_EQ_SPLIT:
641 case InferenceId::STRINGS_DEQ_STRINGS_EQ:
642 case InferenceId::STRINGS_DEQ_LENS_EQ:
643 case InferenceId::STRINGS_DEQ_LENGTH_SP:
644 {
645 if (conc.getKind() != OR)
646 {
647 // This should never happen. If it does, we resort to using
648 // THEORY_INFERENCE below (in production mode).
649 Assert(false) << "Expected OR conclusion for " << infer;
650 }
651 else
652 {
653 ps.d_rule = PfRule::SPLIT;
654 Assert(ps.d_children.empty());
655 ps.d_args.push_back(conc[0]);
656 }
657 }
658 break;
659 // ========================== Regular expression unfolding
660 case InferenceId::STRINGS_RE_UNFOLD_POS:
661 case InferenceId::STRINGS_RE_UNFOLD_NEG:
662 {
663 Assert (!ps.d_children.empty());
664 size_t nchild = ps.d_children.size();
665 Node mem = ps.d_children[nchild-1];
666 if (nchild>1)
667 {
668 // if more than one, apply MACRO_SR_PRED_ELIM
669 std::vector<Node> tcs;
670 tcs.insert(tcs.end(),
671 ps.d_children.begin(),
672 ps.d_children.begin() + (nchild-1));
673 mem = psb.applyPredElim(mem, tcs);
674 useBuffer = true;
675 }
676 PfRule r = PfRule::UNKNOWN;
677 if (mem.isNull())
678 {
679 // failed to eliminate above
680 Assert(false) << "Failed to apply MACRO_SR_PRED_ELIM for RE unfold";
681 useBuffer = false;
682 }
683 else if (infer == InferenceId::STRINGS_RE_UNFOLD_POS)
684 {
685 r = PfRule::RE_UNFOLD_POS;
686 }
687 else
688 {
689 r = PfRule::RE_UNFOLD_NEG;
690 // it may be an optimized form of concat simplification
691 Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
692 if (mem[0][1].getKind() == REGEXP_CONCAT)
693 {
694 size_t index;
695 Node reLen = RegExpOpr::getRegExpConcatFixed(mem[0][1], index);
696 // if we can find a fixed length for a component, use the optimized
697 // version
698 if (!reLen.isNull())
699 {
700 r = PfRule::RE_UNFOLD_NEG_CONCAT_FIXED;
701 }
702 }
703 }
704 if (useBuffer)
705 {
706 mem = psb.tryStep(r, {mem}, {});
707 // should match the conclusion
708 useBuffer = (mem==conc);
709 }
710 else
711 {
712 ps.d_rule = r;
713 }
714 }
715 break;
716 // ========================== Reduction
717 case InferenceId::STRINGS_CTN_POS:
718 case InferenceId::STRINGS_CTN_NEG_EQUAL:
719 {
720 if (ps.d_children.size() != 1)
721 {
722 break;
723 }
724 bool polarity = ps.d_children[0].getKind() != NOT;
725 Node atom = polarity ? ps.d_children[0] : ps.d_children[0][0];
726 std::vector<Node> args;
727 args.push_back(atom);
728 Node res = psb.tryStep(PfRule::STRING_EAGER_REDUCTION, {}, args);
729 if (res.isNull())
730 {
731 break;
732 }
733 // ite( contains(x,t), x = k1 ++ t ++ k2, x != t )
734 std::vector<Node> tiChildren;
735 tiChildren.push_back(ps.d_children[0]);
736 Node ctnt = psb.tryStep(
737 polarity ? PfRule::TRUE_INTRO : PfRule::FALSE_INTRO, tiChildren, {});
738 if (ctnt.isNull() || ctnt.getKind() != EQUAL)
739 {
740 break;
741 }
742 std::vector<Node> tchildren;
743 tchildren.push_back(ctnt);
744 // apply substitution { contains(x,t) -> true|false } and rewrite to get
745 // conclusion x = k1 ++ t ++ k2 or x != t.
746 if (psb.applyPredTransform(res, conc, tchildren))
747 {
748 useBuffer = true;
749 }
750 }
751 break;
752
753 case InferenceId::STRINGS_REDUCTION:
754 {
755 size_t nchild = conc.getNumChildren();
756 Node mainEq;
757 if (conc.getKind() == EQUAL)
758 {
759 mainEq = conc;
760 }
761 else if (conc.getKind() == AND && conc[nchild - 1].getKind() == EQUAL)
762 {
763 mainEq = conc[nchild - 1];
764 }
765 if (mainEq.isNull())
766 {
767 Trace("strings-ipc-red") << "Bad Reduction: " << conc << std::endl;
768 Assert(false) << "Unexpected reduction " << conc;
769 break;
770 }
771 std::vector<Node> argsRed;
772 // the left hand side of the last conjunct is the term we are reducing
773 argsRed.push_back(mainEq[0]);
774 Node red = psb.tryStep(PfRule::STRING_REDUCTION, {}, argsRed);
775 Trace("strings-ipc-red") << "Reduction : " << red << std::endl;
776 if (!red.isNull())
777 {
778 // either equal or rewrites to it
779 std::vector<Node> cexp;
780 if (psb.applyPredTransform(red, conc, cexp))
781 {
782 Trace("strings-ipc-red") << "...success!" << std::endl;
783 useBuffer = true;
784 }
785 else
786 {
787 Trace("strings-ipc-red") << "...failed to reduce" << std::endl;
788 }
789 }
790 }
791 break;
792 // ========================== code injectivity
793 case InferenceId::STRINGS_CODE_INJ:
794 {
795 ps.d_rule = PfRule::STRING_CODE_INJ;
796 Assert(conc.getKind() == OR && conc.getNumChildren() == 3
797 && conc[2].getKind() == EQUAL);
798 ps.d_args.push_back(conc[2][0]);
799 ps.d_args.push_back(conc[2][1]);
800 }
801 break;
802 // ========================== unit injectivity
803 case InferenceId::STRINGS_UNIT_INJ:
804 {
805 ps.d_rule = PfRule::STRING_SEQ_UNIT_INJ;
806 }
807 break;
808 // ========================== prefix conflict
809 case InferenceId::STRINGS_PREFIX_CONFLICT:
810 {
811 Trace("strings-ipc-prefix") << "Prefix conflict..." << std::endl;
812 std::vector<Node> eqs;
813 for (const Node& e : ps.d_children)
814 {
815 Kind ek = e.getKind();
816 if (ek == EQUAL)
817 {
818 Trace("strings-ipc-prefix") << "- equality : " << e << std::endl;
819 eqs.push_back(e);
820 }
821 else if (ek == STRING_IN_REGEXP)
822 {
823 // unfold it and extract the equality
824 std::vector<Node> children;
825 children.push_back(e);
826 std::vector<Node> args;
827 Node eunf = psb.tryStep(PfRule::RE_UNFOLD_POS, children, args);
828 Trace("strings-ipc-prefix")
829 << "--- " << e << " unfolds to " << eunf << std::endl;
830 if (eunf.isNull())
831 {
832 continue;
833 }
834 else if (eunf.getKind() == AND)
835 {
836 // equality is the first conjunct
837 std::vector<Node> childrenAE;
838 childrenAE.push_back(eunf);
839 std::vector<Node> argsAE;
840 argsAE.push_back(nm->mkConst(Rational(0)));
841 Node eunfAE = psb.tryStep(PfRule::AND_ELIM, childrenAE, argsAE);
842 Trace("strings-ipc-prefix")
843 << "--- and elim to " << eunfAE << std::endl;
844 if (eunfAE.isNull() || eunfAE.getKind() != EQUAL)
845 {
846 Assert(false)
847 << "Unexpected unfolded premise " << eunf << " for " << infer;
848 continue;
849 }
850 Trace("strings-ipc-prefix")
851 << "- equality : " << eunfAE << std::endl;
852 eqs.push_back(eunfAE);
853 }
854 else if (eunf.getKind() == EQUAL)
855 {
856 Trace("strings-ipc-prefix") << "- equality : " << eunf << std::endl;
857 eqs.push_back(eunf);
858 }
859 }
860 else
861 {
862 // not sure how to use this assumption
863 Assert(false) << "Unexpected premise " << e << " for " << infer;
864 }
865 }
866 if (eqs.empty())
867 {
868 break;
869 }
870 // connect via transitivity
871 Node curr = eqs[0];
872 for (size_t i = 1, esize = eqs.size(); i < esize; i++)
873 {
874 Node prev = curr;
875 curr = convertTrans(curr, eqs[1], psb);
876 if (curr.isNull())
877 {
878 break;
879 }
880 Trace("strings-ipc-prefix") << "- Via trans: " << curr << std::endl;
881 }
882 if (curr.isNull())
883 {
884 break;
885 }
886 Trace("strings-ipc-prefix")
887 << "- Possible conflicting equality : " << curr << std::endl;
888 std::vector<Node> emp;
889 Node concE = psb.applyPredElim(curr,
890 emp,
891 MethodId::SB_DEFAULT,
892 MethodId::SBA_SEQUENTIAL,
893 MethodId::RW_REWRITE_EQ_EXT);
894 Trace("strings-ipc-prefix")
895 << "- After pred elim: " << concE << std::endl;
896 if (concE == conc)
897 {
898 Trace("strings-ipc-prefix") << "...success!" << std::endl;
899 useBuffer = true;
900 }
901 }
902 break;
903 // ========================== regular expressions
904 case InferenceId::STRINGS_RE_INTER_INCLUDE:
905 case InferenceId::STRINGS_RE_INTER_CONF:
906 case InferenceId::STRINGS_RE_INTER_INFER:
907 {
908 std::vector<Node> reiExp;
909 std::vector<Node> reis;
910 std::vector<Node> reiChildren;
911 std::vector<Node> reiChildrenOrig;
912 Node x;
913 // make the regular expression intersection that summarizes all
914 // memberships in the explanation
915 for (const Node& c : ps.d_children)
916 {
917 bool polarity = c.getKind() != NOT;
918 Node catom = polarity ? c : c[0];
919 if (catom.getKind() != STRING_IN_REGEXP)
920 {
921 Assert(c.getKind() == EQUAL);
922 if (c.getKind() == EQUAL)
923 {
924 reiExp.push_back(c);
925 }
926 continue;
927 }
928 if (x.isNull())
929 {
930 // just take the first LHS; others should be equated to it by exp
931 x = catom[0];
932 }
933 Node rcurr =
934 polarity ? catom[1] : nm->mkNode(REGEXP_COMPLEMENT, catom[1]);
935 reis.push_back(rcurr);
936 Node mem = nm->mkNode(STRING_IN_REGEXP, catom[0], rcurr);
937 reiChildren.push_back(mem);
938 reiChildrenOrig.push_back(c);
939 }
940 // go back and justify each premise
941 bool successChildren = true;
942 for (size_t i = 0, nchild = reiChildren.size(); i < nchild; i++)
943 {
944 if (!psb.applyPredTransform(reiChildrenOrig[i], reiChildren[i], reiExp))
945 {
946 Trace("strings-ipc-re")
947 << "... failed to justify child " << reiChildren[i] << " from "
948 << reiChildrenOrig[i] << std::endl;
949 successChildren = false;
950 break;
951 }
952 }
953 if (!successChildren)
954 {
955 break;
956 }
957 Node mem = psb.tryStep(PfRule::RE_INTER, reiChildren, {});
958 Trace("strings-ipc-re")
959 << "Regular expression summary: " << mem << std::endl;
960 // the conclusion is rewritable to the premises via rewriting?
961 if (psb.applyPredTransform(mem, conc, {}))
962 {
963 Trace("strings-ipc-re") << "... success!" << std::endl;
964 useBuffer = true;
965 }
966 else
967 {
968 Trace("strings-ipc-re")
969 << "...failed to rewrite to conclusion" << std::endl;
970 }
971 }
972 break;
973 // ========================== unknown and currently unsupported
974 case InferenceId::STRINGS_CARDINALITY:
975 case InferenceId::STRINGS_I_CYCLE_E:
976 case InferenceId::STRINGS_I_CYCLE:
977 case InferenceId::STRINGS_INFER_EMP:
978 case InferenceId::STRINGS_RE_DELTA:
979 case InferenceId::STRINGS_RE_DELTA_CONF:
980 case InferenceId::STRINGS_RE_DERIVE:
981 case InferenceId::STRINGS_FLOOP:
982 case InferenceId::STRINGS_FLOOP_CONFLICT:
983 case InferenceId::STRINGS_DEQ_NORM_EMP:
984 case InferenceId::STRINGS_CTN_TRANS:
985 case InferenceId::STRINGS_CTN_DECOMPOSE:
986 default:
987 // do nothing, these will be converted to THEORY_INFERENCE below since the
988 // rule is unknown.
989 break;
990 }
991
992 // now see if we would succeed with the checker-to-try
993 bool success = false;
994 if (ps.d_rule != PfRule::UNKNOWN)
995 {
996 Trace("strings-ipc") << "For " << infer << ", try proof rule " << ps.d_rule
997 << "...";
998 Assert(ps.d_rule != PfRule::UNKNOWN);
999 Node pconc = psb.tryStep(ps.d_rule, ps.d_children, ps.d_args);
1000 if (pconc.isNull() || pconc != conc)
1001 {
1002 Trace("strings-ipc") << "failed, pconc is " << pconc << " (expected "
1003 << conc << ")" << std::endl;
1004 ps.d_rule = PfRule::UNKNOWN;
1005 }
1006 else
1007 {
1008 // successfully set up a single step proof in ps
1009 success = true;
1010 Trace("strings-ipc") << "success!" << std::endl;
1011 }
1012 }
1013 else if (useBuffer)
1014 {
1015 // successfully set up a multi step proof in psb
1016 success = true;
1017 }
1018 else
1019 {
1020 Trace("strings-ipc") << "For " << infer << " " << conc
1021 << ", no proof rule, failed" << std::endl;
1022 }
1023 if (!success)
1024 {
1025 // debug print
1026 if (Trace.isOn("strings-ipc-fail"))
1027 {
1028 Trace("strings-ipc-fail")
1029 << "InferProofCons::convert: Failed " << infer
1030 << (isRev ? " :rev " : " ") << conc << std::endl;
1031 for (const Node& ec : exp)
1032 {
1033 Trace("strings-ipc-fail") << " e: " << ec << std::endl;
1034 }
1035 }
1036 // untrustworthy conversion, the argument of THEORY_INFERENCE is its
1037 // conclusion
1038 ps.d_args.clear();
1039 ps.d_args.push_back(conc);
1040 Node t = builtin::BuiltinProofRuleChecker::mkTheoryIdNode(THEORY_STRINGS);
1041 ps.d_args.push_back(t);
1042 // use the trust rule
1043 ps.d_rule = PfRule::THEORY_INFERENCE;
1044 }
1045 if (Trace.isOn("strings-ipc-debug"))
1046 {
1047 if (useBuffer)
1048 {
1049 Trace("strings-ipc-debug")
1050 << "InferProofCons::convert returned buffer with "
1051 << psb.getNumSteps() << " steps:" << std::endl;
1052 const std::vector<std::pair<Node, ProofStep>>& steps = psb.getSteps();
1053 for (const std::pair<Node, ProofStep>& step : steps)
1054 {
1055 Trace("strings-ipc-debug")
1056 << "- " << step.first << " via " << step.second << std::endl;
1057 }
1058 }
1059 else
1060 {
1061 Trace("strings-ipc-debug")
1062 << "InferProofCons::convert returned " << ps << std::endl;
1063 }
1064 }
1065 }
1066
1067 bool InferProofCons::convertLengthPf(Node lenReq,
1068 const std::vector<Node>& lenExp,
1069 TheoryProofStepBuffer& psb)
1070 {
1071 for (const Node& le : lenExp)
1072 {
1073 if (lenReq == le)
1074 {
1075 return true;
1076 }
1077 }
1078 Trace("strings-ipc-len") << "Must explain " << lenReq << " by " << lenExp
1079 << std::endl;
1080 for (const Node& le : lenExp)
1081 {
1082 // probably rewrites to it?
1083 std::vector<Node> exp;
1084 if (psb.applyPredTransform(le, lenReq, exp))
1085 {
1086 Trace("strings-ipc-len") << "...success by rewrite" << std::endl;
1087 return true;
1088 }
1089 // maybe x != "" => len(x) != 0
1090 std::vector<Node> children;
1091 children.push_back(le);
1092 std::vector<Node> args;
1093 Node res = psb.tryStep(PfRule::STRING_LENGTH_NON_EMPTY, children, args);
1094 if (res == lenReq)
1095 {
1096 Trace("strings-ipc-len") << "...success by LENGTH_NON_EMPTY" << std::endl;
1097 return true;
1098 }
1099 }
1100 Trace("strings-ipc-len") << "...failed" << std::endl;
1101 return false;
1102 }
1103
1104 Node InferProofCons::convertTrans(Node eqa,
1105 Node eqb,
1106 TheoryProofStepBuffer& psb)
1107 {
1108 if (eqa.getKind() != EQUAL || eqb.getKind() != EQUAL)
1109 {
1110 return Node::null();
1111 }
1112 for (uint32_t i = 0; i < 2; i++)
1113 {
1114 Node eqaSym = i == 0 ? eqa[1].eqNode(eqa[0]) : eqa;
1115 for (uint32_t j = 0; j < 2; j++)
1116 {
1117 Node eqbSym = j == 0 ? eqb : eqb[1].eqNode(eqb[1]);
1118 if (eqa[i] == eqb[j])
1119 {
1120 std::vector<Node> children;
1121 children.push_back(eqaSym);
1122 children.push_back(eqbSym);
1123 return psb.tryStep(PfRule::TRANS, children, {});
1124 }
1125 }
1126 }
1127 return Node::null();
1128 }
1129
1130 std::shared_ptr<ProofNode> InferProofCons::getProofFor(Node fact)
1131 {
1132 // get the inference
1133 NodeInferInfoMap::iterator it = d_lazyFactMap.find(fact);
1134 if (it == d_lazyFactMap.end())
1135 {
1136 Node factSym = CDProof::getSymmFact(fact);
1137 if (!factSym.isNull())
1138 {
1139 // Use the symmetric fact. There is no need to explictly make a
1140 // SYMM proof, as this is handled by CDProof::getProofFor below.
1141 it = d_lazyFactMap.find(factSym);
1142 }
1143 }
1144 AlwaysAssert(it != d_lazyFactMap.end());
1145 std::shared_ptr<InferInfo> ii = (*it).second;
1146 Assert(ii->d_conc == fact);
1147 // make a placeholder proof using STRINGS_INFERENCE, which is reconstructed
1148 // during post-process
1149 CDProof pf(d_pnm);
1150 std::vector<Node> args;
1151 packArgs(ii->d_conc, ii->getId(), ii->d_idRev, ii->d_premises, args);
1152 // must flatten
1153 std::vector<Node> exp;
1154 for (const Node& ec : ii->d_premises)
1155 {
1156 utils::flattenOp(AND, ec, exp);
1157 }
1158 pf.addStep(fact, PfRule::STRING_INFERENCE, exp, args);
1159 return pf.getProofFor(fact);
1160 }
1161
1162 std::string InferProofCons::identify() const
1163 {
1164 return "strings::InferProofCons";
1165 }
1166
1167 bool InferProofCons::purifyCoreSubstitution(Node& tgt,
1168 std::vector<Node>& children,
1169 TheoryProofStepBuffer& psb,
1170 bool concludeTgtNew)
1171 {
1172 // collect the terms to purify, which are the LHS of the substitution
1173 std::unordered_set<Node> termsToPurify;
1174 for (const Node& nc : children)
1175 {
1176 Assert(nc.getKind() == EQUAL && nc[0].getType().isStringLike());
1177 termsToPurify.insert(nc[0]);
1178 }
1179 // now, purify each of the children of the substitution
1180 for (size_t i = 0, nchild = children.size(); i < nchild; i++)
1181 {
1182 Node pnc = purifyCorePredicate(children[i], true, psb, termsToPurify);
1183 if (pnc.isNull())
1184 {
1185 return false;
1186 }
1187 if (children[i] != pnc)
1188 {
1189 Trace("strings-ipc-pure-subs")
1190 << "Converted: " << children[i] << " to " << pnc << std::endl;
1191 children[i] = pnc;
1192 }
1193 // we now should have a substitution with only atomic terms
1194 Assert(children[i][0].getNumChildren() == 0);
1195 }
1196 // now, purify the target predicate
1197 tgt = purifyCorePredicate(tgt, concludeTgtNew, psb, termsToPurify);
1198 return !tgt.isNull();
1199 }
1200
1201 Node InferProofCons::purifyCorePredicate(
1202 Node lit,
1203 bool concludeNew,
1204 TheoryProofStepBuffer& psb,
1205 std::unordered_set<Node>& termsToPurify)
1206 {
1207 bool pol = lit.getKind() != NOT;
1208 Node atom = pol ? lit : lit[0];
1209 if (atom.getKind() != EQUAL || !atom[0].getType().isStringLike())
1210 {
1211 // we only purify string (dis)equalities
1212 return lit;
1213 }
1214 // purify both sides of the equality
1215 std::vector<Node> pcs;
1216 bool childChanged = false;
1217 for (const Node& lc : atom)
1218 {
1219 Node plc = purifyCoreTerm(lc, termsToPurify);
1220 childChanged = childChanged || plc != lc;
1221 pcs.push_back(plc);
1222 }
1223 if (!childChanged)
1224 {
1225 return lit;
1226 }
1227 NodeManager* nm = NodeManager::currentNM();
1228 Node newLit = nm->mkNode(EQUAL, pcs);
1229 if (!pol)
1230 {
1231 newLit = newLit.notNode();
1232 }
1233 Assert(lit != newLit);
1234 // prove by transformation, should always succeed
1235 if (!psb.applyPredTransform(
1236 concludeNew ? lit : newLit, concludeNew ? newLit : lit, {}))
1237 {
1238 // failed, return null
1239 return Node::null();
1240 }
1241 return newLit;
1242 }
1243
1244 Node InferProofCons::purifyCoreTerm(Node n,
1245 std::unordered_set<Node>& termsToPurify)
1246 {
1247 Assert(n.getType().isStringLike());
1248 if (n.getNumChildren() == 0)
1249 {
1250 return n;
1251 }
1252 NodeManager* nm = NodeManager::currentNM();
1253 if (n.getKind() == STRING_CONCAT)
1254 {
1255 std::vector<Node> pcs;
1256 for (const Node& nc : n)
1257 {
1258 pcs.push_back(purifyCoreTerm(nc, termsToPurify));
1259 }
1260 return nm->mkNode(STRING_CONCAT, pcs);
1261 }
1262 if (termsToPurify.find(n) == termsToPurify.end())
1263 {
1264 // did not need to purify
1265 return n;
1266 }
1267 SkolemManager* sm = nm->getSkolemManager();
1268 Node k = sm->mkPurifySkolem(n, "k");
1269 return k;
1270 }
1271
1272 } // namespace strings
1273 } // namespace theory
1274 } // namespace cvc5