Towards theory of sequences.
This makes the strings solver handle seq.unit, which requires two new inferences and updates to its model construction.
It also fixes a bug in the best content heuristic, which previously failed to update the best score.
#include "theory/strings/base_solver.h"
+#include "expr/sequence.h"
#include "options/strings_options.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/strings/word.h"
if (!tn.isRegExp())
{
Node emps;
- if (tn.isString())
+ if (tn.isStringLike())
{
d_stringsEqc.push_back(eqc);
emps = Word::mkEmptyWord(tn);
while (!eqc_i.isFinished())
{
Node n = *eqc_i;
- if (n.isConst())
+ Kind k = n.getKind();
+ // process constant-like terms
+ if (utils::isConstantLike(n))
{
- d_eqcInfo[eqc].d_bestContent = n;
- d_eqcInfo[eqc].d_base = n;
- d_eqcInfo[eqc].d_exp = Node::null();
+ Node prev = d_eqcInfo[eqc].d_bestContent;
+ if (!prev.isNull())
+ {
+ // we have either (seq.unit x) = C, or (seq.unit x) = (seq.unit y)
+ // where C is a sequence constant.
+ Node cval =
+ prev.isConst() ? prev : (n.isConst() ? n : Node::null());
+ std::vector<Node> exp;
+ exp.push_back(prev.eqNode(n));
+ Node s, t;
+ if (cval.isNull())
+ {
+ // injectivity of seq.unit
+ s = prev[0];
+ t = n[0];
+ }
+ else
+ {
+ // should not have two constants in the same equivalence class
+ Assert(cval.getType().isSequence());
+ std::vector<Node> cchars = Word::getChars(cval);
+ if (cchars.size() == 1)
+ {
+ Node oval = prev.isConst() ? n : prev;
+ Assert(oval.getKind() == SEQ_UNIT);
+ s = oval[0];
+ t = cchars[0]
+ .getConst<ExprSequence>()
+ .getSequence()
+ .getVec()[0];
+ // oval is congruent (ignored) in this context
+ d_congruent.insert(oval);
+ }
+ else
+ {
+ // (seq.unit x) = C => false if |C| != 1.
+ d_im.sendInference(
+ exp, d_false, Inference::UNIT_CONST_CONFLICT);
+ return;
+ }
+ }
+ if (!d_state.areEqual(s, t))
+ {
+ // (seq.unit x) = (seq.unit y) => x=y, or
+ // (seq.unit x) = (seq.unit c) => x=c
+ Assert(s.getType() == t.getType());
+ d_im.sendInference(exp, s.eqNode(t), Inference::UNIT_INJ);
+ }
+ }
+ // update best content
+ if (prev.isNull() || n.isConst())
+ {
+ d_eqcInfo[eqc].d_bestContent = n;
+ d_eqcInfo[eqc].d_bestScore = 0;
+ d_eqcInfo[eqc].d_base = n;
+ d_eqcInfo[eqc].d_exp = Node::null();
+ }
}
- else if (tn.isInteger())
+ if (tn.isInteger())
{
// do nothing
}
+ // process indexing
else if (n.getNumChildren() > 0)
{
- Kind k = n.getKind();
if (k != EQUAL)
{
if (d_congruent.find(n) == d_congruent.end())
{
// check if we have inferred a new equality by removal of empty
// components
- if (n.getKind() == STRING_CONCAT && !d_state.areEqual(nc, n))
+ if (k == STRING_CONCAT && !d_state.areEqual(nc, n))
{
std::vector<Node> exp;
size_t count[2] = {0, 0};
}
}
}
- else
+ else if (!n.isConst())
{
if (d_congruent.find(n) == d_congruent.end())
{
Node nct = utils::mkNConcat(vecnc, n.getType());
Assert(!nct.isConst());
bei.d_bestContent = nct;
+ bei.d_bestScore = contentSize;
bei.d_base = n;
if (!exp.empty())
{
std::vector<std::vector<Node> >& cols,
std::vector<Node>& lts)
{
+ Trace("strings-card") << "Check cardinality (type " << tn << ")..."
+ << std::endl;
NodeManager* nm = NodeManager::currentNM();
- Trace("strings-card") << "Check cardinality...." << std::endl;
+ uint32_t typeCardSize;
+ if (tn.isString()) // string-only
+ {
+ typeCardSize = d_cardSize;
+ }
+ else
+ {
+ Assert(tn.isSequence());
+ TypeNode etn = tn.getSequenceElementType();
+ if (etn.isInterpretedFinite())
+ {
+ // infinite cardinality, we are fine
+ return;
+ }
+ // TODO (cvc4-projects #23): how to handle sequence for finite types?
+ return;
+ }
// for each collection
for (unsigned i = 0, csize = cols.size(); i < csize; ++i)
{
// no restriction on sets in the partition of size 1
continue;
}
- if (!tn.isString())
- {
- // TODO (cvc4-projects #23): how to handle sequence for finite types?
- continue;
- }
// size > c^k
unsigned card_need = 1;
double curr = static_cast<double>(cols[i].size());
- while (curr > d_cardSize)
+ while (curr > typeCardSize)
{
- curr = curr / static_cast<double>(d_cardSize);
+ curr = curr / static_cast<double>(typeCardSize);
card_need++;
}
Trace("strings-card")
<< "Need length " << card_need
- << " for this number of strings (where alphabet size is " << d_cardSize
- << ")." << std::endl;
+ << " for this number of strings (where alphabet size is "
+ << typeCardSize << ") given type " << tn << "." << std::endl;
// check if we need to split
bool needsSplit = true;
if (lr.isConst())
while( !eqc_i.isFinished() ){
Node n = (*eqc_i);
if( !d_bsolver.isCongruent(n) ){
- if (n.isConst() || n.getKind() == STRING_CONCAT)
+ Kind nk = n.getKind();
+ bool isCLike = utils::isConstantLike(n);
+ if (isCLike || nk == STRING_CONCAT)
{
Trace("strings-process-debug") << "Get Normal Form : Process term " << n << " in eqc " << eqc << std::endl;
NormalForm nf_curr;
- if (n.isConst())
+ if (isCLike)
{
nf_curr.init(n);
}
- else if (n.getKind() == STRING_CONCAT)
+ else if (nk == STRING_CONCAT)
{
// set the base to n, we construct the other portions of nf_curr in
// the following.
}
//if not equal to self
std::vector<Node>& currv = nf_curr.d_nf;
- if (currv.size() > 1 || (currv.size() == 1 && currv[0].isConst()))
+ if (currv.size() > 1
+ || (currv.size() == 1 && utils::isConstantLike(currv[0])))
{
// if in a build with assertions, check that normal form is acyclic
if (Configuration::isAssertionBuild())
d_extt.addFunctionKind(kind::STRING_TOLOWER);
d_extt.addFunctionKind(kind::STRING_TOUPPER);
d_extt.addFunctionKind(kind::STRING_REV);
+ d_extt.addFunctionKind(kind::SEQ_UNIT);
d_true = NodeManager::currentNM()->mkConst(true);
d_false = NodeManager::currentNM()->mkConst(false);
}
}
}
- else
+ else if (k == STRING_SUBSTR)
{
- if (k == STRING_SUBSTR)
- {
- r_effort = 1;
- }
- else if (k != STRING_IN_REGEXP)
- {
- r_effort = 2;
- }
+ r_effort = 1;
+ }
+ else if (k == SEQ_UNIT)
+ {
+ // never necessary to reduce seq.unit
+ return false;
+ }
+ else if (k != STRING_IN_REGEXP)
+ {
+ r_effort = 2;
}
if (effort != r_effort)
{
case Inference::I_CONST_MERGE: return "I_CONST_MERGE";
case Inference::I_CONST_CONFLICT: return "I_CONST_CONFLICT";
case Inference::I_NORM: return "I_NORM";
+ case Inference::UNIT_INJ: return "UNIT_INJ";
+ case Inference::UNIT_CONST_CONFLICT: return "UNIT_CONST_CONFLICT";
case Inference::CARD_SP: return "CARD_SP";
case Inference::CARDINALITY: return "CARDINALITY";
case Inference::I_CYCLE_E: return "I_CYCLE_E";
// equal after e.g. removing strings that are currently empty. For example:
// y = "" ^ z = "" => x ++ y = z ++ x
I_NORM,
+ // injectivity of seq.unit
+ UNIT_INJ,
+ // unit constant conflict
+ UNIT_CONST_CONFLICT,
// A split due to cardinality
CARD_SP,
// The cardinality inference for strings, see Liang et al CAV 2014.
case Rewrite::LEN_CONCAT: return "LEN_CONCAT";
case Rewrite::LEN_REPL_INV: return "LEN_REPL_INV";
case Rewrite::LEN_CONV_INV: return "LEN_CONV_INV";
+ case Rewrite::LEN_SEQ_UNIT: return "LEN_SEQ_UNIT";
case Rewrite::CHARAT_ELIM: return "CHARAT_ELIM";
case Rewrite::SEQ_UNIT_EVAL: return "SEQ_UNIT_EVAL";
default: return "?";
LEN_CONCAT,
LEN_REPL_INV,
LEN_CONV_INV,
+ LEN_SEQ_UNIT,
CHARAT_ELIM,
SEQ_UNIT_EVAL
};
Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
}
+ else if (nk0 == SEQ_UNIT)
+ {
+ Node retNode = nm->mkConst(Rational(1));
+ return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
+ }
return node;
}
nb << emp.eqNode(t);
for (const Node& c : vec)
{
+ Assert(c.getType() == t.getType());
nb << c.eqNode(t);
}
Assert(ratLen.getDenominator() == 1);
Integer intLen = ratLen.getNumerator();
uint32_t u = intLen.getUnsignedInt();
- if (stype.isString())
+ if (stype.isString()) // string-only
{
res = nm->mkConst(String(std::string(u, 'A')));
}
{
std::vector<Expr> seq;
seq.push_back(node[0].toExpr());
- TypeNode stype = nm->mkSequenceType(node[0].getType());
+ TypeNode stype = node[0].getType();
Node ret = nm->mkConst(ExprSequence(stype.toType(), seq));
return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
}
NormalForm& nfe = d_csolver->getNormalForm(eqc);
if (nfe.d_nf.size() == 1)
{
+ // is it an equivalence class with a seq.unit term?
+ if (nfe.d_nf[0].getKind() == SEQ_UNIT)
+ {
+ pure_eq_assign[eqc] = nfe.d_nf[0];
+ Trace("strings-model") << "(unit: " << nfe.d_nf[0] << ") ";
+ }
// does it have a code and the length of these equivalence classes are
// one?
- if (d_termReg.hasStringCode() && lts_values[i] == d_one)
+ else if (d_termReg.hasStringCode() && lts_values[i] == d_one)
{
EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false);
if (eip && !eip->d_codeTerm.get().isNull())
std::unique_ptr<SEnumLen> sel;
Trace("strings-model") << "Cardinality of alphabet is "
<< utils::getAlphabetCardinality() << std::endl;
- if (tn.isString())
+ if (tn.isString()) // string-only
{
sel.reset(new StringEnumLen(
currLen, currLen, utils::getAlphabetCardinality()));
nc.push_back(r.isConst() ? r : processed[r]);
}
Node cc = utils::mkNConcat(nc, tn);
- Assert(cc.isConst());
Trace("strings-model")
<< "*** Determined constant " << cc << " for " << rn << std::endl;
processed[rn] = cc;
{
// this should never happen due to the model soundness argument
// for strings
-
Unreachable() << "TheoryStrings::collectModelInfoType: "
"Inconsistent equality (unprocessed eqc)"
<< std::endl;
return false;
}
+ else if (!cc.isConst())
+ {
+ // the value may be specified by seq.unit components, ensure this
+ // is marked as the skeleton for constructing values in this class.
+ m->assertSkeleton(cc);
+ }
}
}
//Trace("strings-model") << "String Model : Assigned." << std::endl;
allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
}
+bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
+
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
{
size_t i = start;
*/
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x);
+/**
+ * Return if a string-like term n is "constant-like", that is, either a
+ * constant string/sequence, or an application of seq.unit.
+ *
+ * @param n The string-like term
+ * @return true if n is constant-like.
+ */
+bool isConstantLike(Node n);
+
/**
* Given a vector of regular expression nodes and a start index that points to
* a wildcard, returns true if the wildcard is unbounded (i.e. it is followed