5eba8663ab695ecef0b7f789fe5f2a9e88cd549c
[cvc5.git] / src / theory / strings / infer_proof_cons.cpp
1 /******************************************************************************
2 * Top contributors (to current version):
3 * Andrew Reynolds, Gereon Kremer, Aina Niemetz
4 *
5 * This file is part of the cvc5 project.
6 *
7 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8 * in the top-level source directory and their institutional affiliations.
9 * All rights reserved. See the file COPYING in the top-level source
10 * directory for licensing information.
11 * ****************************************************************************
12 *
13 * Implementation of inference to proof conversion.
14 */
15
16 #include "theory/strings/infer_proof_cons.h"
17
18 #include "expr/skolem_manager.h"
19 #include "options/smt_options.h"
20 #include "options/strings_options.h"
21 #include "proof/proof_node_manager.h"
22 #include "theory/builtin/proof_checker.h"
23 #include "theory/rewriter.h"
24 #include "theory/strings/regexp_operation.h"
25 #include "theory/strings/theory_strings_utils.h"
26 #include "util/statistics_registry.h"
27
28 using namespace cvc5::kind;
29
30 namespace cvc5 {
31 namespace theory {
32 namespace strings {
33
34 InferProofCons::InferProofCons(context::Context* c,
35 ProofNodeManager* pnm,
36 SequencesStatistics& statistics)
37 : d_pnm(pnm), d_lazyFactMap(c), d_statistics(statistics)
38 {
39 Assert(d_pnm != nullptr);
40 }
41
42 void InferProofCons::notifyFact(const InferInfo& ii)
43 {
44 Node fact = ii.d_conc;
45 Trace("strings-ipc-debug")
46 << "InferProofCons::notifyFact: " << ii << std::endl;
47 if (d_lazyFactMap.find(fact) != d_lazyFactMap.end())
48 {
49 Trace("strings-ipc-debug") << "...duplicate!" << std::endl;
50 return;
51 }
52 Node symFact = CDProof::getSymmFact(fact);
53 if (!symFact.isNull() && d_lazyFactMap.find(symFact) != d_lazyFactMap.end())
54 {
55 Trace("strings-ipc-debug") << "...duplicate (sym)!" << std::endl;
56 return;
57 }
58 std::shared_ptr<InferInfo> iic = std::make_shared<InferInfo>(ii);
59 d_lazyFactMap.insert(ii.d_conc, iic);
60 }
61
62 void InferProofCons::notifyLemma(const InferInfo& ii)
63 {
64 d_lazyFactMap[ii.d_conc] = std::make_shared<InferInfo>(ii);
65 }
66
67 bool InferProofCons::addProofTo(CDProof* pf,
68 Node conc,
69 InferenceId infer,
70 bool isRev,
71 const std::vector<Node>& exp)
72 {
73 // now go back and convert it to proof steps and add to proof
74 bool useBuffer = false;
75 ProofStep ps;
76 TheoryProofStepBuffer psb(pf->getManager()->getChecker());
77 // run the conversion
78 convert(infer, isRev, conc, exp, ps, psb, useBuffer);
79 // make the proof based on the step or the buffer
80 if (useBuffer)
81 {
82 if (!pf->addSteps(psb))
83 {
84 return false;
85 }
86 }
87 else
88 {
89 if (!pf->addStep(conc, ps))
90 {
91 return false;
92 }
93 }
94 return true;
95 }
96
97 void InferProofCons::packArgs(Node conc,
98 InferenceId infer,
99 bool isRev,
100 const std::vector<Node>& exp,
101 std::vector<Node>& args)
102 {
103 args.push_back(conc);
104 args.push_back(mkInferenceIdNode(infer));
105 args.push_back(NodeManager::currentNM()->mkConst(isRev));
106 // The vector exp is stored as arguments; its flatten form are premises. We
107 // need both since the grouping of exp is important, e.g. { (and a b), c }
108 // is different from { a, b, c } in the convert routine, since positions
109 // of formulas in exp have special meaning.
110 args.insert(args.end(), exp.begin(), exp.end());
111 }
112
113 bool InferProofCons::unpackArgs(const std::vector<Node>& args,
114 Node& conc,
115 InferenceId& infer,
116 bool& isRev,
117 std::vector<Node>& exp)
118 {
119 Assert(args.size() >= 3);
120 conc = args[0];
121 if (!getInferenceId(args[1], infer))
122 {
123 return false;
124 }
125 isRev = args[2].getConst<bool>();
126 exp.insert(exp.end(), args.begin() + 3, args.end());
127 return true;
128 }
129
130 void InferProofCons::convert(InferenceId infer,
131 bool isRev,
132 Node conc,
133 const std::vector<Node>& exp,
134 ProofStep& ps,
135 TheoryProofStepBuffer& psb,
136 bool& useBuffer)
137 {
138 // by default, don't use the buffer
139 useBuffer = false;
140 // Must flatten children with respect to AND to be ready to explain.
141 // We store the index where each flattened vector begins, since some
142 // explanations are grouped together using AND.
143 std::vector<size_t> startExpIndex;
144 for (const Node& ec : exp)
145 {
146 // store the index in the flattened vector
147 startExpIndex.push_back(ps.d_children.size());
148 utils::flattenOp(AND, ec, ps.d_children);
149 }
150 // debug print
151 if (Trace.isOn("strings-ipc-debug"))
152 {
153 Trace("strings-ipc-debug") << "InferProofCons::convert: " << infer
154 << (isRev ? " :rev " : " ") << conc << std::endl;
155 for (const Node& ec : ps.d_children)
156 {
157 Trace("strings-ipc-debug") << " e: " << ec << std::endl;
158 }
159 }
160 // try to find a set of proof steps to incorporate into the buffer
161 psb.clear();
162 NodeManager* nm = NodeManager::currentNM();
163 Node nodeIsRev = nm->mkConst(isRev);
164 switch (infer)
165 {
166 // ========================== equal by substitution+rewriting
167 case InferenceId::STRINGS_I_NORM_S:
168 case InferenceId::STRINGS_I_CONST_MERGE:
169 case InferenceId::STRINGS_I_NORM:
170 case InferenceId::STRINGS_LEN_NORM:
171 case InferenceId::STRINGS_NORMAL_FORM:
172 case InferenceId::STRINGS_CODE_PROXY:
173 {
174 std::vector<Node> pcs = ps.d_children;
175 Node pconc = conc;
176 // purify core substitution proves conc from pconc if necessary,
177 // we apply MACRO_SR_PRED_INTRO to prove pconc
178 if (purifyCoreSubstitution(pconc, pcs, psb, false))
179 {
180 if (psb.applyPredIntro(pconc, pcs))
181 {
182 useBuffer = true;
183 }
184 }
185 }
186 break;
187 // ========================== substitution + rewriting
188 case InferenceId::STRINGS_RE_NF_CONFLICT:
189 case InferenceId::STRINGS_EXTF:
190 case InferenceId::STRINGS_EXTF_N:
191 case InferenceId::STRINGS_EXTF_D:
192 case InferenceId::STRINGS_EXTF_D_N:
193 case InferenceId::STRINGS_I_CONST_CONFLICT:
194 case InferenceId::STRINGS_UNIT_CONST_CONFLICT:
195 {
196 if (!ps.d_children.empty())
197 {
198 std::vector<Node> exps(ps.d_children.begin(), ps.d_children.end() - 1);
199 Node src = ps.d_children[ps.d_children.size() - 1];
200 if (psb.applyPredTransform(src, conc, exps))
201 {
202 useBuffer = true;
203 }
204 }
205 else
206 {
207 // use the predicate version?
208 ps.d_args.push_back(conc);
209 ps.d_rule = PfRule::MACRO_SR_PRED_INTRO;
210 }
211 }
212 break;
213 // ========================== rewrite pred
214 case InferenceId::STRINGS_EXTF_EQ_REW:
215 {
216 // the last child is the predicate we are operating on, move to front
217 Node src = ps.d_children[ps.d_children.size() - 1];
218 std::vector<Node> expe(ps.d_children.begin(), ps.d_children.end() - 1);
219 // start with a default rewrite
220 Node mainEqSRew = psb.applyPredElim(src, expe);
221 if (mainEqSRew == conc)
222 {
223 useBuffer = true;
224 break;
225 }
226 // may need the "extended equality rewrite"
227 Node mainEqSRew2 = psb.applyPredElim(mainEqSRew,
228 {},
229 MethodId::SB_DEFAULT,
230 MethodId::SBA_SEQUENTIAL,
231 MethodId::RW_REWRITE_EQ_EXT);
232 if (mainEqSRew2 == conc)
233 {
234 useBuffer = true;
235 break;
236 }
237 // rewrite again with default rewriter
238 Node mainEqSRew3 = psb.applyPredElim(mainEqSRew2, {});
239 useBuffer = (mainEqSRew3 == conc);
240 }
241 break;
242 // ========================== substitution+rewriting, CONCAT_EQ, ...
243 case InferenceId::STRINGS_F_CONST:
244 case InferenceId::STRINGS_F_UNIFY:
245 case InferenceId::STRINGS_F_ENDPOINT_EMP:
246 case InferenceId::STRINGS_F_ENDPOINT_EQ:
247 case InferenceId::STRINGS_F_NCTN:
248 case InferenceId::STRINGS_N_EQ_CONF:
249 case InferenceId::STRINGS_N_CONST:
250 case InferenceId::STRINGS_N_UNIFY:
251 case InferenceId::STRINGS_N_ENDPOINT_EMP:
252 case InferenceId::STRINGS_N_ENDPOINT_EQ:
253 case InferenceId::STRINGS_N_NCTN:
254 case InferenceId::STRINGS_SSPLIT_CST_PROP:
255 case InferenceId::STRINGS_SSPLIT_VAR_PROP:
256 case InferenceId::STRINGS_SSPLIT_CST:
257 case InferenceId::STRINGS_SSPLIT_VAR:
258 {
259 Trace("strings-ipc-core") << "Generate core rule for " << infer
260 << " (rev=" << isRev << ")" << std::endl;
261 // All of the above inferences have the form:
262 // (explanation for why t and s have the same prefix/suffix) ^
263 // t = s ^
264 // (length constraint)?
265 // We call t=s the "main equality" below. The length constraint is
266 // optional, which we split on below.
267 size_t nchild = ps.d_children.size();
268 size_t mainEqIndex = 0;
269 bool mainEqIndexSet = false;
270 // the length constraint
271 std::vector<Node> lenConstraint;
272 // these inferences have a length constraint as the last explain
273 if (infer == InferenceId::STRINGS_N_UNIFY || infer == InferenceId::STRINGS_F_UNIFY
274 || infer == InferenceId::STRINGS_SSPLIT_CST || infer == InferenceId::STRINGS_SSPLIT_VAR
275 || infer == InferenceId::STRINGS_SSPLIT_VAR_PROP
276 || infer == InferenceId::STRINGS_SSPLIT_CST_PROP)
277 {
278 if (exp.size() >= 2)
279 {
280 Assert(exp.size() <= startExpIndex.size());
281 // The index of the "main" equality is the last equality before
282 // the length explanation.
283 mainEqIndex = startExpIndex[exp.size() - 1] - 1;
284 mainEqIndexSet = true;
285 // the remainder is the length constraint
286 lenConstraint.insert(lenConstraint.end(),
287 ps.d_children.begin() + mainEqIndex + 1,
288 ps.d_children.end());
289 }
290 }
291 else if (nchild >= 1)
292 {
293 // The index of the main equality is the last child.
294 mainEqIndex = nchild - 1;
295 mainEqIndexSet = true;
296 }
297 Node mainEq;
298 if (mainEqIndexSet)
299 {
300 mainEq = ps.d_children[mainEqIndex];
301 Trace("strings-ipc-core") << "Main equality " << mainEq << " at index "
302 << mainEqIndex << std::endl;
303 }
304 if (mainEq.isNull() || mainEq.getKind() != EQUAL)
305 {
306 Trace("strings-ipc-core")
307 << "...failed to find main equality" << std::endl;
308 break;
309 }
310 // apply MACRO_SR_PRED_ELIM using equalities up to the main eq
311 // we purify the core substitution
312 std::vector<Node> pcsr(ps.d_children.begin(),
313 ps.d_children.begin() + mainEqIndex);
314 Node pmainEq = mainEq;
315 // we transform mainEq to pmainEq and then use this as the first
316 // argument to MACRO_SR_PRED_ELIM.
317 if (!purifyCoreSubstitution(pmainEq, pcsr, psb, true))
318 {
319 break;
320 }
321 std::vector<Node> childrenSRew;
322 childrenSRew.push_back(pmainEq);
323 childrenSRew.insert(childrenSRew.end(), pcsr.begin(), pcsr.end());
324 // now, conclude the proper equality
325 Node mainEqSRew =
326 psb.tryStep(PfRule::MACRO_SR_PRED_ELIM, childrenSRew, {});
327 if (CDProof::isSame(mainEqSRew, pmainEq))
328 {
329 Trace("strings-ipc-core") << "...undo step" << std::endl;
330 // the rule added above was not necessary
331 psb.popStep();
332 }
333 else if (mainEqSRew == conc)
334 {
335 Trace("strings-ipc-core") << "...success after rewrite!" << std::endl;
336 useBuffer = true;
337 break;
338 }
339 Trace("strings-ipc-core")
340 << "Main equality after subs+rewrite " << mainEqSRew << std::endl;
341 // now, apply CONCAT_EQ to get the remainder
342 std::vector<Node> childrenCeq;
343 childrenCeq.push_back(mainEqSRew);
344 std::vector<Node> argsCeq;
345 argsCeq.push_back(nodeIsRev);
346 Node mainEqCeq = psb.tryStep(PfRule::CONCAT_EQ, childrenCeq, argsCeq);
347 Trace("strings-ipc-core")
348 << "Main equality after CONCAT_EQ " << mainEqCeq << std::endl;
349 if (mainEqCeq.isNull() || mainEqCeq.getKind() != EQUAL)
350 {
351 // fail
352 break;
353 }
354 else if (mainEqCeq == mainEqSRew)
355 {
356 Trace("strings-ipc-core") << "...undo step" << std::endl;
357 // not necessary, probably first component of equality
358 psb.popStep();
359 }
360 // Now, mainEqCeq is an equality t ++ ... == s ++ ... where the
361 // inference involved t and s.
362 if (infer == InferenceId::STRINGS_N_ENDPOINT_EQ
363 || infer == InferenceId::STRINGS_N_ENDPOINT_EMP
364 || infer == InferenceId::STRINGS_F_ENDPOINT_EQ
365 || infer == InferenceId::STRINGS_F_ENDPOINT_EMP)
366 {
367 // Should be equal to conclusion already, or rewrite to it.
368 // Notice that this step is necessary to handle the "rproc"
369 // optimization in processSimpleNEq. Alternatively, this could
370 // possibly be done by CONCAT_EQ with !isRev.
371 std::vector<Node> cexp;
372 if (psb.applyPredTransform(mainEqCeq,
373 conc,
374 cexp,
375 MethodId::SB_DEFAULT,
376 MethodId::SBA_SEQUENTIAL,
377 MethodId::RW_REWRITE_EQ_EXT))
378 {
379 Trace("strings-ipc-core") << "Transformed to " << conc
380 << " via pred transform" << std::endl;
381 // success
382 useBuffer = true;
383 Trace("strings-ipc-core") << "...success!" << std::endl;
384 }
385 // Otherwise, note that EMP rules conclude ti = "" where
386 // t1 ++ ... ++ tn == "". However, these are very rarely applied, let
387 // alone for 2+ children. This case is intentionally unhandled here.
388 }
389 else if (infer == InferenceId::STRINGS_N_CONST || infer == InferenceId::STRINGS_F_CONST
390 || infer == InferenceId::STRINGS_N_EQ_CONF)
391 {
392 // should be a constant conflict
393 std::vector<Node> childrenC;
394 childrenC.push_back(mainEqCeq);
395 std::vector<Node> argsC;
396 argsC.push_back(nodeIsRev);
397 Node mainEqC = psb.tryStep(PfRule::CONCAT_CONFLICT, childrenC, argsC);
398 if (mainEqC == conc)
399 {
400 useBuffer = true;
401 Trace("strings-ipc-core") << "...success!" << std::endl;
402 }
403 }
404 else if (infer == InferenceId::STRINGS_F_NCTN
405 || infer == InferenceId::STRINGS_N_NCTN)
406 {
407 // May require extended equality rewrite, applied after the rewrite
408 // above. Notice we need both in sequence since ext equality rewriting
409 // is not recursive.
410 std::vector<Node> argsERew;
411 addMethodIds(argsERew,
412 MethodId::SB_DEFAULT,
413 MethodId::SBA_SEQUENTIAL,
414 MethodId::RW_REWRITE_EQ_EXT);
415 Node mainEqERew =
416 psb.tryStep(PfRule::MACRO_SR_PRED_ELIM, {mainEqCeq}, argsERew);
417 if (mainEqERew == conc)
418 {
419 useBuffer = true;
420 Trace("strings-ipc-core") << "...success!" << std::endl;
421 }
422 }
423 else
424 {
425 std::vector<Node> tvec;
426 std::vector<Node> svec;
427 utils::getConcat(mainEqCeq[0], tvec);
428 utils::getConcat(mainEqCeq[1], svec);
429 Node t0 = tvec[isRev ? tvec.size() - 1 : 0];
430 Node s0 = svec[isRev ? svec.size() - 1 : 0];
431 bool applySym = false;
432 // may need to apply symmetry
433 if ((infer == InferenceId::STRINGS_SSPLIT_CST
434 || infer == InferenceId::STRINGS_SSPLIT_CST_PROP)
435 && t0.isConst())
436 {
437 Assert(!s0.isConst());
438 applySym = true;
439 std::swap(t0, s0);
440 }
441 if (infer == InferenceId::STRINGS_N_UNIFY || infer == InferenceId::STRINGS_F_UNIFY)
442 {
443 if (conc.getKind() != EQUAL)
444 {
445 break;
446 }
447 // one side should match, the other side could be a split constant
448 if (conc[0] != t0 && conc[1] != s0)
449 {
450 applySym = true;
451 std::swap(t0, s0);
452 }
453 Assert(conc[0].isConst() == t0.isConst());
454 Assert(conc[1].isConst() == s0.isConst());
455 }
456 PfRule rule = PfRule::UNKNOWN;
457 // the form of the required length constraint expected by the proof
458 Node lenReq;
459 bool lenSuccess = false;
460 if (infer == InferenceId::STRINGS_N_UNIFY || infer == InferenceId::STRINGS_F_UNIFY)
461 {
462 // the required premise for unify is always len(x) = len(y),
463 // however the explanation may not be literally this. Thus, we
464 // need to reconstruct a proof from the given explanation.
465 // it should be the case that lenConstraint => lenReq.
466 // We use terms in the conclusion equality, not t0, s0 here.
467 lenReq = nm->mkNode(STRING_LENGTH, conc[0])
468 .eqNode(nm->mkNode(STRING_LENGTH, conc[1]));
469 lenSuccess = convertLengthPf(lenReq, lenConstraint, psb);
470 rule = PfRule::CONCAT_UNIFY;
471 }
472 else if (infer == InferenceId::STRINGS_SSPLIT_VAR)
473 {
474 // it should be the case that lenConstraint => lenReq
475 lenReq = nm->mkNode(STRING_LENGTH, t0)
476 .eqNode(nm->mkNode(STRING_LENGTH, s0))
477 .notNode();
478 lenSuccess = convertLengthPf(lenReq, lenConstraint, psb);
479 rule = PfRule::CONCAT_SPLIT;
480 }
481 else if (infer == InferenceId::STRINGS_SSPLIT_CST)
482 {
483 // it should be the case that lenConstraint => lenReq
484 lenReq = nm->mkNode(STRING_LENGTH, t0)
485 .eqNode(nm->mkConst(Rational(0)))
486 .notNode();
487 lenSuccess = convertLengthPf(lenReq, lenConstraint, psb);
488 rule = PfRule::CONCAT_CSPLIT;
489 }
490 else if (infer == InferenceId::STRINGS_SSPLIT_VAR_PROP)
491 {
492 // it should be the case that lenConstraint => lenReq
493 for (unsigned r = 0; r < 2; r++)
494 {
495 lenReq = nm->mkNode(GT,
496 nm->mkNode(STRING_LENGTH, t0),
497 nm->mkNode(STRING_LENGTH, s0));
498 if (convertLengthPf(lenReq, lenConstraint, psb))
499 {
500 lenSuccess = true;
501 break;
502 }
503 if (r == 0)
504 {
505 // may be the other direction
506 applySym = true;
507 std::swap(t0, s0);
508 }
509 }
510 rule = PfRule::CONCAT_LPROP;
511 }
512 else if (infer == InferenceId::STRINGS_SSPLIT_CST_PROP)
513 {
514 // it should be the case that lenConstraint => lenReq
515 lenReq = nm->mkNode(STRING_LENGTH, t0)
516 .eqNode(nm->mkConst(Rational(0)))
517 .notNode();
518 lenSuccess = convertLengthPf(lenReq, lenConstraint, psb);
519 rule = PfRule::CONCAT_CPROP;
520 }
521 if (!lenSuccess)
522 {
523 Trace("strings-ipc-core")
524 << "...failed due to length constraint" << std::endl;
525 break;
526 }
527 // apply symmetry if necessary
528 if (applySym)
529 {
530 std::vector<Node> childrenSymm;
531 childrenSymm.push_back(mainEqCeq);
532 // note this explicit step may not be necessary
533 mainEqCeq = psb.tryStep(PfRule::SYMM, childrenSymm, {});
534 Trace("strings-ipc-core")
535 << "Main equality after SYMM " << mainEqCeq << std::endl;
536 }
537 if (rule != PfRule::UNKNOWN)
538 {
539 Trace("strings-ipc-core")
540 << "Core rule length requirement is " << lenReq << std::endl;
541 // apply the given rule
542 std::vector<Node> childrenMain;
543 childrenMain.push_back(mainEqCeq);
544 childrenMain.push_back(lenReq);
545 std::vector<Node> argsMain;
546 argsMain.push_back(nodeIsRev);
547 Node mainEqMain = psb.tryStep(rule, childrenMain, argsMain);
548 Trace("strings-ipc-core") << "Main equality after " << rule << " "
549 << mainEqMain << std::endl;
550 if (mainEqMain == mainEqCeq)
551 {
552 Trace("strings-ipc-core") << "...undo step" << std::endl;
553 // not necessary, probably first component of equality
554 psb.popStep();
555 }
556 // either equal or rewrites to it
557 std::vector<Node> cexp;
558 if (psb.applyPredTransform(mainEqMain, conc, cexp))
559 {
560 // requires that length success is also true
561 useBuffer = true;
562 Trace("strings-ipc-core") << "...success" << std::endl;
563 }
564 else
565 {
566 Trace("strings-ipc-core") << "...fail" << std::endl;
567 }
568 }
569 else
570 {
571 // should always have given a rule to try above
572 Assert(false) << "No reconstruction rule given for " << infer;
573 }
574 }
575 }
576 break;
577 // ========================== Disequalities
578 case InferenceId::STRINGS_DEQ_DISL_FIRST_CHAR_STRING_SPLIT:
579 case InferenceId::STRINGS_DEQ_DISL_STRINGS_SPLIT:
580 {
581 if (conc.getKind() != AND || conc.getNumChildren() != 2
582 || conc[0].getKind() != EQUAL || !conc[0][0].getType().isStringLike()
583 || conc[1].getKind() != EQUAL
584 || conc[1][0].getKind() != STRING_LENGTH)
585 {
586 Trace("strings-ipc-deq") << "malformed application" << std::endl;
587 Assert(false) << "unexpected conclusion " << conc << " for " << infer;
588 }
589 else
590 {
591 Node lenReq =
592 nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, conc[0][0]), conc[1][1]);
593 Trace("strings-ipc-deq")
594 << "length requirement is " << lenReq << std::endl;
595 if (convertLengthPf(lenReq, ps.d_children, psb))
596 {
597 Trace("strings-ipc-deq") << "...success length" << std::endl;
598 // make the proof
599 std::vector<Node> childrenMain;
600 childrenMain.push_back(lenReq);
601 std::vector<Node> argsMain;
602 argsMain.push_back(nodeIsRev);
603 Node mainConc =
604 psb.tryStep(PfRule::STRING_DECOMPOSE, childrenMain, argsMain);
605 Trace("strings-ipc-deq")
606 << "...main conclusion is " << mainConc << std::endl;
607 useBuffer = (mainConc == conc);
608 Trace("strings-ipc-deq")
609 << "...success is " << useBuffer << std::endl;
610 }
611 else
612 {
613 Trace("strings-ipc-deq") << "...fail length" << std::endl;
614 }
615 }
616 }
617 break;
618 // ========================== Boolean split
619 case InferenceId::STRINGS_CARD_SP:
620 case InferenceId::STRINGS_LEN_SPLIT:
621 case InferenceId::STRINGS_LEN_SPLIT_EMP:
622 case InferenceId::STRINGS_DEQ_DISL_EMP_SPLIT:
623 case InferenceId::STRINGS_DEQ_DISL_FIRST_CHAR_EQ_SPLIT:
624 case InferenceId::STRINGS_DEQ_STRINGS_EQ:
625 case InferenceId::STRINGS_DEQ_LENS_EQ:
626 case InferenceId::STRINGS_DEQ_LENGTH_SP:
627 {
628 if (conc.getKind() != OR)
629 {
630 // This should never happen. If it does, we resort to using
631 // THEORY_INFERENCE below (in production mode).
632 Assert(false) << "Expected OR conclusion for " << infer;
633 }
634 else
635 {
636 ps.d_rule = PfRule::SPLIT;
637 Assert(ps.d_children.empty());
638 ps.d_args.push_back(conc[0]);
639 }
640 }
641 break;
642 // ========================== Regular expression unfolding
643 case InferenceId::STRINGS_RE_UNFOLD_POS:
644 case InferenceId::STRINGS_RE_UNFOLD_NEG:
645 {
646 Assert (!ps.d_children.empty());
647 size_t nchild = ps.d_children.size();
648 Node mem = ps.d_children[nchild-1];
649 if (nchild>1)
650 {
651 // if more than one, apply MACRO_SR_PRED_ELIM
652 std::vector<Node> tcs;
653 tcs.insert(tcs.end(),
654 ps.d_children.begin(),
655 ps.d_children.begin() + (nchild-1));
656 mem = psb.applyPredElim(mem, tcs);
657 useBuffer = true;
658 }
659 PfRule r = PfRule::UNKNOWN;
660 if (mem.isNull())
661 {
662 // failed to eliminate above
663 Assert(false) << "Failed to apply MACRO_SR_PRED_ELIM for RE unfold";
664 useBuffer = false;
665 }
666 else if (infer == InferenceId::STRINGS_RE_UNFOLD_POS)
667 {
668 r = PfRule::RE_UNFOLD_POS;
669 }
670 else
671 {
672 r = PfRule::RE_UNFOLD_NEG;
673 // it may be an optimized form of concat simplification
674 Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
675 if (mem[0][1].getKind() == REGEXP_CONCAT)
676 {
677 size_t index;
678 Node reLen = RegExpOpr::getRegExpConcatFixed(mem[0][1], index);
679 // if we can find a fixed length for a component, use the optimized
680 // version
681 if (!reLen.isNull())
682 {
683 r = PfRule::RE_UNFOLD_NEG_CONCAT_FIXED;
684 }
685 }
686 }
687 if (useBuffer)
688 {
689 mem = psb.tryStep(r, {mem}, {});
690 // should match the conclusion
691 useBuffer = (mem==conc);
692 }
693 else
694 {
695 ps.d_rule = r;
696 }
697 }
698 break;
699 // ========================== Reduction
700 case InferenceId::STRINGS_CTN_POS:
701 case InferenceId::STRINGS_CTN_NEG_EQUAL:
702 {
703 if (ps.d_children.size() != 1)
704 {
705 break;
706 }
707 bool polarity = ps.d_children[0].getKind() != NOT;
708 Node atom = polarity ? ps.d_children[0] : ps.d_children[0][0];
709 std::vector<Node> args;
710 args.push_back(atom);
711 Node res = psb.tryStep(PfRule::STRING_EAGER_REDUCTION, {}, args);
712 if (res.isNull())
713 {
714 break;
715 }
716 // ite( contains(x,t), x = k1 ++ t ++ k2, x != t )
717 std::vector<Node> tiChildren;
718 tiChildren.push_back(ps.d_children[0]);
719 Node ctnt = psb.tryStep(
720 polarity ? PfRule::TRUE_INTRO : PfRule::FALSE_INTRO, tiChildren, {});
721 if (ctnt.isNull() || ctnt.getKind() != EQUAL)
722 {
723 break;
724 }
725 std::vector<Node> tchildren;
726 tchildren.push_back(ctnt);
727 // apply substitution { contains(x,t) -> true|false } and rewrite to get
728 // conclusion x = k1 ++ t ++ k2 or x != t.
729 if (psb.applyPredTransform(res, conc, tchildren))
730 {
731 useBuffer = true;
732 }
733 }
734 break;
735
736 case InferenceId::STRINGS_REDUCTION:
737 {
738 size_t nchild = conc.getNumChildren();
739 Node mainEq;
740 if (conc.getKind() == EQUAL)
741 {
742 mainEq = conc;
743 }
744 else if (conc.getKind() == AND && conc[nchild - 1].getKind() == EQUAL)
745 {
746 mainEq = conc[nchild - 1];
747 }
748 if (mainEq.isNull())
749 {
750 Trace("strings-ipc-red") << "Bad Reduction: " << conc << std::endl;
751 Assert(false) << "Unexpected reduction " << conc;
752 break;
753 }
754 std::vector<Node> argsRed;
755 // the left hand side of the last conjunct is the term we are reducing
756 argsRed.push_back(mainEq[0]);
757 Node red = psb.tryStep(PfRule::STRING_REDUCTION, {}, argsRed);
758 Trace("strings-ipc-red") << "Reduction : " << red << std::endl;
759 if (!red.isNull())
760 {
761 // either equal or rewrites to it
762 std::vector<Node> cexp;
763 if (psb.applyPredTransform(red, conc, cexp))
764 {
765 Trace("strings-ipc-red") << "...success!" << std::endl;
766 useBuffer = true;
767 }
768 else
769 {
770 Trace("strings-ipc-red") << "...failed to reduce" << std::endl;
771 }
772 }
773 }
774 break;
775 // ========================== code injectivity
776 case InferenceId::STRINGS_CODE_INJ:
777 {
778 ps.d_rule = PfRule::STRING_CODE_INJ;
779 Assert(conc.getKind() == OR && conc.getNumChildren() == 3
780 && conc[2].getKind() == EQUAL);
781 ps.d_args.push_back(conc[2][0]);
782 ps.d_args.push_back(conc[2][1]);
783 }
784 break;
785 // ========================== unit injectivity
786 case InferenceId::STRINGS_UNIT_INJ:
787 {
788 ps.d_rule = PfRule::STRING_SEQ_UNIT_INJ;
789 }
790 break;
791 // ========================== prefix conflict
792 case InferenceId::STRINGS_PREFIX_CONFLICT:
793 {
794 Trace("strings-ipc-prefix") << "Prefix conflict..." << std::endl;
795 std::vector<Node> eqs;
796 for (const Node& e : ps.d_children)
797 {
798 Kind ek = e.getKind();
799 if (ek == EQUAL)
800 {
801 Trace("strings-ipc-prefix") << "- equality : " << e << std::endl;
802 eqs.push_back(e);
803 }
804 else if (ek == STRING_IN_REGEXP)
805 {
806 // unfold it and extract the equality
807 std::vector<Node> children;
808 children.push_back(e);
809 std::vector<Node> args;
810 Node eunf = psb.tryStep(PfRule::RE_UNFOLD_POS, children, args);
811 Trace("strings-ipc-prefix")
812 << "--- " << e << " unfolds to " << eunf << std::endl;
813 if (eunf.isNull())
814 {
815 continue;
816 }
817 else if (eunf.getKind() == AND)
818 {
819 // equality is the first conjunct
820 std::vector<Node> childrenAE;
821 childrenAE.push_back(eunf);
822 std::vector<Node> argsAE;
823 argsAE.push_back(nm->mkConst(Rational(0)));
824 Node eunfAE = psb.tryStep(PfRule::AND_ELIM, childrenAE, argsAE);
825 Trace("strings-ipc-prefix")
826 << "--- and elim to " << eunfAE << std::endl;
827 if (eunfAE.isNull() || eunfAE.getKind() != EQUAL)
828 {
829 Assert(false)
830 << "Unexpected unfolded premise " << eunf << " for " << infer;
831 continue;
832 }
833 Trace("strings-ipc-prefix")
834 << "- equality : " << eunfAE << std::endl;
835 eqs.push_back(eunfAE);
836 }
837 else if (eunf.getKind() == EQUAL)
838 {
839 Trace("strings-ipc-prefix") << "- equality : " << eunf << std::endl;
840 eqs.push_back(eunf);
841 }
842 }
843 else
844 {
845 // not sure how to use this assumption
846 Assert(false) << "Unexpected premise " << e << " for " << infer;
847 }
848 }
849 if (eqs.empty())
850 {
851 break;
852 }
853 // connect via transitivity
854 Node curr = eqs[0];
855 for (size_t i = 1, esize = eqs.size(); i < esize; i++)
856 {
857 Node prev = curr;
858 curr = convertTrans(curr, eqs[1], psb);
859 if (curr.isNull())
860 {
861 break;
862 }
863 Trace("strings-ipc-prefix") << "- Via trans: " << curr << std::endl;
864 }
865 if (curr.isNull())
866 {
867 break;
868 }
869 Trace("strings-ipc-prefix")
870 << "- Possible conflicting equality : " << curr << std::endl;
871 std::vector<Node> emp;
872 Node concE = psb.applyPredElim(curr,
873 emp,
874 MethodId::SB_DEFAULT,
875 MethodId::SBA_SEQUENTIAL,
876 MethodId::RW_REWRITE_EQ_EXT);
877 Trace("strings-ipc-prefix")
878 << "- After pred elim: " << concE << std::endl;
879 if (concE == conc)
880 {
881 Trace("strings-ipc-prefix") << "...success!" << std::endl;
882 useBuffer = true;
883 }
884 }
885 break;
886 // ========================== regular expressions
887 case InferenceId::STRINGS_RE_INTER_INCLUDE:
888 case InferenceId::STRINGS_RE_INTER_CONF:
889 case InferenceId::STRINGS_RE_INTER_INFER:
890 {
891 std::vector<Node> reiExp;
892 std::vector<Node> reis;
893 std::vector<Node> reiChildren;
894 std::vector<Node> reiChildrenOrig;
895 Node x;
896 // make the regular expression intersection that summarizes all
897 // memberships in the explanation
898 for (const Node& c : ps.d_children)
899 {
900 bool polarity = c.getKind() != NOT;
901 Node catom = polarity ? c : c[0];
902 if (catom.getKind() != STRING_IN_REGEXP)
903 {
904 Assert(c.getKind() == EQUAL);
905 if (c.getKind() == EQUAL)
906 {
907 reiExp.push_back(c);
908 }
909 continue;
910 }
911 if (x.isNull())
912 {
913 // just take the first LHS; others should be equated to it by exp
914 x = catom[0];
915 }
916 Node rcurr =
917 polarity ? catom[1] : nm->mkNode(REGEXP_COMPLEMENT, catom[1]);
918 reis.push_back(rcurr);
919 Node mem = nm->mkNode(STRING_IN_REGEXP, catom[0], rcurr);
920 reiChildren.push_back(mem);
921 reiChildrenOrig.push_back(c);
922 }
923 // go back and justify each premise
924 bool successChildren = true;
925 for (size_t i = 0, nchild = reiChildren.size(); i < nchild; i++)
926 {
927 if (!psb.applyPredTransform(reiChildrenOrig[i], reiChildren[i], reiExp))
928 {
929 Trace("strings-ipc-re")
930 << "... failed to justify child " << reiChildren[i] << " from "
931 << reiChildrenOrig[i] << std::endl;
932 successChildren = false;
933 break;
934 }
935 }
936 if (!successChildren)
937 {
938 break;
939 }
940 Node mem = psb.tryStep(PfRule::RE_INTER, reiChildren, {});
941 Trace("strings-ipc-re")
942 << "Regular expression summary: " << mem << std::endl;
943 // the conclusion is rewritable to the premises via rewriting?
944 if (psb.applyPredTransform(mem, conc, {}))
945 {
946 Trace("strings-ipc-re") << "... success!" << std::endl;
947 useBuffer = true;
948 }
949 else
950 {
951 Trace("strings-ipc-re")
952 << "...failed to rewrite to conclusion" << std::endl;
953 }
954 }
955 break;
956 // ========================== unknown and currently unsupported
957 case InferenceId::STRINGS_CARDINALITY:
958 case InferenceId::STRINGS_I_CYCLE_E:
959 case InferenceId::STRINGS_I_CYCLE:
960 case InferenceId::STRINGS_INFER_EMP:
961 case InferenceId::STRINGS_RE_DELTA:
962 case InferenceId::STRINGS_RE_DELTA_CONF:
963 case InferenceId::STRINGS_RE_DERIVE:
964 case InferenceId::STRINGS_FLOOP:
965 case InferenceId::STRINGS_FLOOP_CONFLICT:
966 case InferenceId::STRINGS_DEQ_NORM_EMP:
967 case InferenceId::STRINGS_CTN_TRANS:
968 case InferenceId::STRINGS_CTN_DECOMPOSE:
969 default:
970 // do nothing, these will be converted to THEORY_INFERENCE below since the
971 // rule is unknown.
972 break;
973 }
974
975 // now see if we would succeed with the checker-to-try
976 bool success = false;
977 if (ps.d_rule != PfRule::UNKNOWN)
978 {
979 Trace("strings-ipc") << "For " << infer << ", try proof rule " << ps.d_rule
980 << "...";
981 Assert(ps.d_rule != PfRule::UNKNOWN);
982 Node pconc = psb.tryStep(ps.d_rule, ps.d_children, ps.d_args);
983 if (pconc.isNull() || pconc != conc)
984 {
985 Trace("strings-ipc") << "failed, pconc is " << pconc << " (expected "
986 << conc << ")" << std::endl;
987 ps.d_rule = PfRule::UNKNOWN;
988 }
989 else
990 {
991 // successfully set up a single step proof in ps
992 success = true;
993 Trace("strings-ipc") << "success!" << std::endl;
994 }
995 }
996 else if (useBuffer)
997 {
998 // successfully set up a multi step proof in psb
999 success = true;
1000 }
1001 else
1002 {
1003 Trace("strings-ipc") << "For " << infer << " " << conc
1004 << ", no proof rule, failed" << std::endl;
1005 }
1006 if (!success)
1007 {
1008 // debug print
1009 if (Trace.isOn("strings-ipc-fail"))
1010 {
1011 Trace("strings-ipc-fail")
1012 << "InferProofCons::convert: Failed " << infer
1013 << (isRev ? " :rev " : " ") << conc << std::endl;
1014 for (const Node& ec : exp)
1015 {
1016 Trace("strings-ipc-fail") << " e: " << ec << std::endl;
1017 }
1018 }
1019 // untrustworthy conversion, the argument of THEORY_INFERENCE is its
1020 // conclusion
1021 ps.d_args.clear();
1022 ps.d_args.push_back(conc);
1023 Node t = builtin::BuiltinProofRuleChecker::mkTheoryIdNode(THEORY_STRINGS);
1024 ps.d_args.push_back(t);
1025 // use the trust rule
1026 ps.d_rule = PfRule::THEORY_INFERENCE;
1027 }
1028 if (Trace.isOn("strings-ipc-debug"))
1029 {
1030 if (useBuffer)
1031 {
1032 Trace("strings-ipc-debug")
1033 << "InferProofCons::convert returned buffer with "
1034 << psb.getNumSteps() << " steps:" << std::endl;
1035 const std::vector<std::pair<Node, ProofStep>>& steps = psb.getSteps();
1036 for (const std::pair<Node, ProofStep>& step : steps)
1037 {
1038 Trace("strings-ipc-debug")
1039 << "- " << step.first << " via " << step.second << std::endl;
1040 }
1041 }
1042 else
1043 {
1044 Trace("strings-ipc-debug")
1045 << "InferProofCons::convert returned " << ps << std::endl;
1046 }
1047 }
1048 }
1049
1050 bool InferProofCons::convertLengthPf(Node lenReq,
1051 const std::vector<Node>& lenExp,
1052 TheoryProofStepBuffer& psb)
1053 {
1054 for (const Node& le : lenExp)
1055 {
1056 if (lenReq == le)
1057 {
1058 return true;
1059 }
1060 }
1061 Trace("strings-ipc-len") << "Must explain " << lenReq << " by " << lenExp
1062 << std::endl;
1063 for (const Node& le : lenExp)
1064 {
1065 // probably rewrites to it?
1066 std::vector<Node> exp;
1067 if (psb.applyPredTransform(le, lenReq, exp))
1068 {
1069 Trace("strings-ipc-len") << "...success by rewrite" << std::endl;
1070 return true;
1071 }
1072 // maybe x != "" => len(x) != 0
1073 std::vector<Node> children;
1074 children.push_back(le);
1075 std::vector<Node> args;
1076 Node res = psb.tryStep(PfRule::STRING_LENGTH_NON_EMPTY, children, args);
1077 if (res == lenReq)
1078 {
1079 Trace("strings-ipc-len") << "...success by LENGTH_NON_EMPTY" << std::endl;
1080 return true;
1081 }
1082 }
1083 Trace("strings-ipc-len") << "...failed" << std::endl;
1084 return false;
1085 }
1086
1087 Node InferProofCons::convertTrans(Node eqa,
1088 Node eqb,
1089 TheoryProofStepBuffer& psb)
1090 {
1091 if (eqa.getKind() != EQUAL || eqb.getKind() != EQUAL)
1092 {
1093 return Node::null();
1094 }
1095 for (uint32_t i = 0; i < 2; i++)
1096 {
1097 Node eqaSym = i == 0 ? eqa[1].eqNode(eqa[0]) : eqa;
1098 for (uint32_t j = 0; j < 2; j++)
1099 {
1100 Node eqbSym = j == 0 ? eqb : eqb[1].eqNode(eqb[1]);
1101 if (eqa[i] == eqb[j])
1102 {
1103 std::vector<Node> children;
1104 children.push_back(eqaSym);
1105 children.push_back(eqbSym);
1106 return psb.tryStep(PfRule::TRANS, children, {});
1107 }
1108 }
1109 }
1110 return Node::null();
1111 }
1112
1113 std::shared_ptr<ProofNode> InferProofCons::getProofFor(Node fact)
1114 {
1115 // get the inference
1116 NodeInferInfoMap::iterator it = d_lazyFactMap.find(fact);
1117 if (it == d_lazyFactMap.end())
1118 {
1119 Node factSym = CDProof::getSymmFact(fact);
1120 if (!factSym.isNull())
1121 {
1122 // Use the symmetric fact. There is no need to explictly make a
1123 // SYMM proof, as this is handled by CDProof::getProofFor below.
1124 it = d_lazyFactMap.find(factSym);
1125 }
1126 }
1127 AlwaysAssert(it != d_lazyFactMap.end());
1128 std::shared_ptr<InferInfo> ii = (*it).second;
1129 Assert(ii->d_conc == fact);
1130 // make a placeholder proof using STRINGS_INFERENCE, which is reconstructed
1131 // during post-process
1132 CDProof pf(d_pnm);
1133 std::vector<Node> args;
1134 packArgs(ii->d_conc, ii->getId(), ii->d_idRev, ii->d_premises, args);
1135 // must flatten
1136 std::vector<Node> exp;
1137 for (const Node& ec : ii->d_premises)
1138 {
1139 utils::flattenOp(AND, ec, exp);
1140 }
1141 pf.addStep(fact, PfRule::STRING_INFERENCE, exp, args);
1142 return pf.getProofFor(fact);
1143 }
1144
1145 std::string InferProofCons::identify() const
1146 {
1147 return "strings::InferProofCons";
1148 }
1149
1150 bool InferProofCons::purifyCoreSubstitution(Node& tgt,
1151 std::vector<Node>& children,
1152 TheoryProofStepBuffer& psb,
1153 bool concludeTgtNew)
1154 {
1155 // collect the terms to purify, which are the LHS of the substitution
1156 std::unordered_set<Node> termsToPurify;
1157 for (const Node& nc : children)
1158 {
1159 Assert(nc.getKind() == EQUAL && nc[0].getType().isStringLike());
1160 termsToPurify.insert(nc[0]);
1161 }
1162 // now, purify each of the children of the substitution
1163 for (size_t i = 0, nchild = children.size(); i < nchild; i++)
1164 {
1165 Node pnc = purifyCorePredicate(children[i], true, psb, termsToPurify);
1166 if (pnc.isNull())
1167 {
1168 return false;
1169 }
1170 if (children[i] != pnc)
1171 {
1172 Trace("strings-ipc-pure-subs")
1173 << "Converted: " << children[i] << " to " << pnc << std::endl;
1174 children[i] = pnc;
1175 }
1176 // we now should have a substitution with only atomic terms
1177 Assert(children[i][0].getNumChildren() == 0);
1178 }
1179 // now, purify the target predicate
1180 tgt = purifyCorePredicate(tgt, concludeTgtNew, psb, termsToPurify);
1181 return !tgt.isNull();
1182 }
1183
1184 Node InferProofCons::purifyCorePredicate(
1185 Node lit,
1186 bool concludeNew,
1187 TheoryProofStepBuffer& psb,
1188 std::unordered_set<Node>& termsToPurify)
1189 {
1190 bool pol = lit.getKind() != NOT;
1191 Node atom = pol ? lit : lit[0];
1192 if (atom.getKind() != EQUAL || !atom[0].getType().isStringLike())
1193 {
1194 // we only purify string (dis)equalities
1195 return lit;
1196 }
1197 // purify both sides of the equality
1198 std::vector<Node> pcs;
1199 bool childChanged = false;
1200 for (const Node& lc : atom)
1201 {
1202 Node plc = purifyCoreTerm(lc, termsToPurify);
1203 childChanged = childChanged || plc != lc;
1204 pcs.push_back(plc);
1205 }
1206 if (!childChanged)
1207 {
1208 return lit;
1209 }
1210 NodeManager* nm = NodeManager::currentNM();
1211 Node newLit = nm->mkNode(EQUAL, pcs);
1212 if (!pol)
1213 {
1214 newLit = newLit.notNode();
1215 }
1216 Assert(lit != newLit);
1217 // prove by transformation, should always succeed
1218 if (!psb.applyPredTransform(
1219 concludeNew ? lit : newLit, concludeNew ? newLit : lit, {}))
1220 {
1221 // failed, return null
1222 return Node::null();
1223 }
1224 return newLit;
1225 }
1226
1227 Node InferProofCons::purifyCoreTerm(Node n,
1228 std::unordered_set<Node>& termsToPurify)
1229 {
1230 Assert(n.getType().isStringLike());
1231 if (n.getNumChildren() == 0)
1232 {
1233 return n;
1234 }
1235 NodeManager* nm = NodeManager::currentNM();
1236 if (n.getKind() == STRING_CONCAT)
1237 {
1238 std::vector<Node> pcs;
1239 for (const Node& nc : n)
1240 {
1241 pcs.push_back(purifyCoreTerm(nc, termsToPurify));
1242 }
1243 return nm->mkNode(STRING_CONCAT, pcs);
1244 }
1245 if (termsToPurify.find(n) == termsToPurify.end())
1246 {
1247 // did not need to purify
1248 return n;
1249 }
1250 SkolemManager* sm = nm->getSkolemManager();
1251 Node k = sm->mkPurifySkolem(n, "k");
1252 return k;
1253 }
1254
1255 } // namespace strings
1256 } // namespace theory
1257 } // namespace cvc5