This updates our string reductions for `to_lower`, `to_upper`, `from_int`, `to_int` to use STRING_NTH instead of STRING_TO_CODE applied to STRING_SUBSTR.
It optionally replaces STRING_TO_CODE with STRING_NTH during preprocessing (true by default).
This is work towards efficient support for `to_lower`/`to_upper`.
type = "uint64_t"
default = "65536"
help = "The maximum size of string values in models"
+
+[[option]]
+ name = "stringsCodeElim"
+ category = "expert"
+ long = "strings-code-elim"
+ type = "bool"
+ default = "true"
+ help = "eliminate code points during preprocessing"
size_t len = Word::getLength(s);
ret = nm->mkConstInt(Rational(len));
}
- else if (sk == SEQ_UNIT)
+ else if (sk == SEQ_UNIT || sk == STRING_UNIT)
{
ret = nm->mkConstInt(1);
}
TypeNode intType = nm->integerType();
Node k = sc->mkSkolemFun(SkolemFunId::STRINGS_DEQ_DIFF, intType, n1, n2);
Node deq = eq.negate();
- Node ss1, ss2;
- if (n1.getType().isString())
- {
- // substring of length 1
- ss1 = nm->mkNode(STRING_SUBSTR, n1, k, d_one);
- ss2 = nm->mkNode(STRING_SUBSTR, n2, k, d_one);
- }
- else
- {
- // as an optimization, for sequences, use seq.nth
- ss1 = nm->mkNode(SEQ_NTH, n1, k);
- ss2 = nm->mkNode(SEQ_NTH, n2, k);
- }
+ // use seq.nth instead of substr
+ Node ss1 = nm->mkNode(SEQ_NTH, n1, k);
+ Node ss2 = nm->mkNode(SEQ_NTH, n2, k);
// disequality between nth/substr
Node conc1 = ss1.eqNode(ss2).negate();
Node code_range = mkCodeRange(t, alphaCard);
lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
}
+ else if (tk == SEQ_NTH)
+ {
+ if (t[0].getType().isString())
+ {
+ Node s = t[0];
+ Node n = t[1];
+ // start point is greater than or equal zero
+ Node c1 = nm->mkNode(GEQ, n, nm->mkConstInt(0));
+ // start point is less than end of string
+ Node c2 = nm->mkNode(GT, nm->mkNode(STRING_LENGTH, s), n);
+ // check whether this application of seq.nth is defined.
+ Node cond = nm->mkNode(AND, c1, c2);
+ Node code_range = mkCodeRange(t, alphaCard);
+ // the lemma for `seq.nth`
+ lemma = nm->mkNode(ITE, cond, code_range, t.eqNode(nm->mkConstInt(Rational(-1))));
+ // IF: n >=0 AND n < len( s )
+ // THEN: 0 <= (seq.nth s n) < |A|
+ // ELSE: (seq.nth s n) = -1
+ }
+ }
else if (tk == STRING_INDEXOF || tk == STRING_INDEXOF_RE)
{
// (and
d_stringsFmf(env, valuation, d_termReg),
d_strat(d_env),
d_absModelCounter(0),
+ d_strGapModelCounter(0),
d_cpacb(*this)
{
d_termReg.finishInit(&d_im);
d_equalityEngine->addFunctionKind(kind::STRING_IN_REGEXP, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_TO_CODE, eagerEval);
d_equalityEngine->addFunctionKind(kind::SEQ_UNIT, eagerEval);
+ d_equalityEngine->addFunctionKind(kind::STRING_UNIT, false);
// `seq.nth` is not always defined, and so we do not evaluate it eagerly.
d_equalityEngine->addFunctionKind(kind::SEQ_NTH, false);
// extended functions
// memberships are not relevant for model building
d_valuation.setIrrelevantKind(kind::STRING_IN_REGEXP);
+ d_valuation.setIrrelevantKind(kind::STRING_LEQ);
// seq nth doesn't always evaluate
d_valuation.setUnevaluatedKind(SEQ_NTH);
}
bool TheoryStrings::collectModelValues(TheoryModel* m,
const std::set<Node>& termSet)
{
+ d_absModelCounter = 0;
+ d_strGapModelCounter = 0;
if (TraceIsOn("strings-debug-model"))
{
Trace("strings-debug-model")
}
// is it an equivalence class with a seq.unit term?
Node assignedValue;
- if (nfe.d_nf[0].getKind() == SEQ_UNIT)
+ if (nfe.d_nf[0].getKind() == SEQ_UNIT
+ || nfe.d_nf[0].getKind() == STRING_UNIT)
{
- Node argVal;
if (nfe.d_nf[0][0].getType().isStringLike())
{
// By this point, we should have assigned model values for the
// elements of this sequence type because of the check in the
// beginning of this method
- argVal = m->getRepresentative(nfe.d_nf[0][0]);
+ Node argVal = m->getRepresentative(nfe.d_nf[0][0]);
+ Assert(nfe.d_nf[0].getKind() == SEQ_UNIT);
+ assignedValue = utils::mkUnit(eqc.getType(), argVal);
}
else
{
// value of this term, since it might not be available yet, as
// it may belong to a theory that has not built its model yet.
// Hence, we assign a (non-constant) skeleton (seq.unit argVal).
- argVal = nfe.d_nf[0][0];
+ assignedValue = nfe.d_nf[0];
}
- Assert(!argVal.isNull()) << "No value for " << nfe.d_nf[0][0];
- assignedValue = rewrite(nm->mkNode(SEQ_UNIT, argVal));
+ assignedValue = rewrite(assignedValue);
Trace("strings-model")
<< "-> assign via seq.unit: " << assignedValue << std::endl;
}
}
else if (options().strings.seqArray != options::SeqArrayMode::NONE)
{
- TypeNode etype = eqc.getType().getSequenceElementType();
+ TypeNode eqcType = eqc.getType();
// determine skeleton based on the write model, if it exists
const std::map<Node, Node>& writeModel = d_asolver.getWriteModel(eqc);
Trace("strings-model")
continue;
}
usedWrites.insert(ivalue);
- Node wsunit = nm->mkNode(SEQ_UNIT, w.second);
+ Node wsunit = utils::mkUnit(eqcType, w.second);
writes.emplace_back(ivalue, wsunit);
}
// sort based on index value
}
if (nextIndex > currIndex)
{
+ Trace("strings-model") << "Make skeleton from " << currIndex
+ << " ... " << nextIndex << std::endl;
// allocate arbitrary value to fill gap
Assert(conSeq != nullptr);
Node base = eqc;
NodeManager* nm = NodeManager::currentNM();
SkolemManager* sm = nm->getSkolemManager();
BoundVarManager* bvm = nm->getBoundVarManager();
+ TypeNode tn = c.getType();
+ Assert(tn.isSequence());
Assert(c.getKind() == CONST_SEQUENCE);
const Sequence& sn = c.getConst<Sequence>();
const std::vector<Node>& snvec = sn.getVec();
std::vector<Node> skChildren;
- TypeNode etn = c.getType().getSequenceElementType();
+ TypeNode etn = tn.getSequenceElementType();
for (const Node& snv : snvec)
{
Assert(snv.getType() == etn);
Node v = bvm->mkBoundVar<SeqModelVarAttribute>(snv, etn);
// use a skolem, not a bound variable
Node kv = sm->mkPurifySkolem(v, "smv");
- skChildren.push_back(nm->mkNode(SEQ_UNIT, kv));
+ skChildren.push_back(utils::mkUnit(tn, kv));
}
return utils::mkConcat(skChildren, c.getType());
}
size_t currIndex,
size_t nextIndex)
{
+ Assert(nextIndex > currIndex);
Assert(!r.isNull());
- Assert(r.getType().isSequence());
NodeManager* nm = NodeManager::currentNM();
SkolemManager* sm = nm->getSkolemManager();
- std::vector<Node> cacheVals;
- cacheVals.push_back(r);
+ TypeNode tn = r.getType();
std::vector<Node> skChildren;
- TypeNode etn = r.getType().getSequenceElementType();
- for (size_t i = currIndex; i < nextIndex; i++)
+ if (tn.isSequence())
{
- cacheVals.push_back(nm->mkConstInt(Rational(currIndex)));
- Node kv = sm->mkSkolemFunction(
- SkolemFunId::SEQ_MODEL_BASE_ELEMENT, etn, cacheVals);
- skChildren.push_back(nm->mkNode(SEQ_UNIT, kv));
- cacheVals.pop_back();
+ std::vector<Node> cacheVals;
+ cacheVals.push_back(r);
+ TypeNode etn = tn.getSequenceElementType();
+ for (size_t i = currIndex; i < nextIndex; i++)
+ {
+ cacheVals.push_back(nm->mkConstInt(Rational(currIndex)));
+ Node kv = sm->mkSkolemFunction(
+ SkolemFunId::SEQ_MODEL_BASE_ELEMENT, etn, cacheVals);
+ skChildren.push_back(utils::mkUnit(tn, kv));
+ cacheVals.pop_back();
+ }
}
- return utils::mkConcat(skChildren, r.getType());
+ else
+ {
+ // allocate a unique symbolic (unspecified) string of length one, and
+ // repeat it (nextIndex-currIndex) times.
+ // Notice that this is guaranteed to be a unique (unspecified) character,
+ // since the only existing str.unit terms originate from our reductions,
+ // and hence are only applied to non-negative arguments. If the user
+ // was able to give arbitrary constraints over str.unit terms, then this
+ // construction would require a character not used in the model value of
+ // any other string.
+ d_strGapModelCounter++;
+ Node symChar =
+ utils::mkUnit(tn, nm->mkConstInt(-Rational(d_strGapModelCounter)));
+ skChildren.resize(nextIndex - currIndex, symChar);
+ }
+ return utils::mkConcat(skChildren, tn);
}
/////////////////////////////////////////////////////////////////////////////
TrustNode TheoryStrings::ppRewrite(TNode atom, std::vector<SkolemLemma>& lems)
{
Trace("strings-ppr") << "TheoryStrings::ppRewrite " << atom << std::endl;
- if (atom.getKind() == EQUAL)
+ Kind ak = atom.getKind();
+ if (ak == EQUAL)
{
// always apply aggressive equality rewrites here
Node ret = d_rewriter.rewriteEqualityExt(atom);
return TrustNode::mkTrustRewrite(atom, ret, nullptr);
}
}
- if (atom.getKind() == STRING_FROM_CODE)
+ if (ak == STRING_FROM_CODE)
{
// str.from_code(t) ---> ite(0 <= t < |A|, t = str.to_code(k), k = "")
NodeManager* nm = NodeManager::currentNM();
lems.push_back(SkolemLemma(tnk, k));
return TrustNode::mkTrustRewrite(atom, k, nullptr);
}
+ else if (ak == STRING_TO_CODE && options().strings.stringsCodeElim)
+ {
+ // str.to_code(t) ---> ite(str.len(t) = 1, str.nth(t,0), -1)
+ NodeManager* nm = NodeManager::currentNM();
+ Node t = atom[0];
+ Node cond = nm->mkNode(EQUAL, nm->mkNode(STRING_LENGTH, t), d_one);
+ Node ret = nm->mkNode(ITE, cond, nm->mkNode(SEQ_NTH, t, d_zero), d_neg_one);
+ return TrustNode::mkTrustRewrite(atom, ret, nullptr);
+ }
+
TrustNode ret;
Node atomRet = atom;
if (options().strings.regExpElim != options::RegExpElimMode::OFF
- && atom.getKind() == STRING_IN_REGEXP)
+ && ak == STRING_IN_REGEXP)
{
// aggressive elimination of regular expression membership
ret = d_regexp_elim.eliminateTrusted(atomRet);
* we have built, so that unique debug names can be assigned.
*/
size_t d_absModelCounter;
+ /**
+ * For model building, a counter on the number of gaps constructed for
+ * string terms due to array reasoning. This is to allocate unique unspecified
+ * characters.
+ */
+ size_t d_strGapModelCounter;
/** The care pair argument callback, used for theory combination */
CarePairArgumentCallback d_cpacb;
};/* class TheoryStrings */
Node xPlusOne = nm->mkNode(ADD, x, one);
Node xbv = nm->mkNode(BOUND_VAR_LIST, x);
Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, leni));
- Node sx = nm->mkNode(STRING_SUBSTR, itost, x, one);
Node ux = nm->mkNode(APPLY_UF, u, x);
Node ux1 = nm->mkNode(APPLY_UF, u, xPlusOne);
Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0")));
- Node c = nm->mkNode(SUB, nm->mkNode(STRING_TO_CODE, sx), c0);
+ Node c = nm->mkNode(SUB, mkCodePointAtIndex(itost, x), c0);
Node ten = nm->mkConstInt(Rational(10));
Node eq = ux1.eqNode(nm->mkNode(ADD, c, nm->mkNode(MULT, ten, ux)));
Node kc1 = nm->mkNode(GEQ, k, zero);
Node kc2 = nm->mkNode(LT, k, lens);
Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0")));
- Node codeSk = nm->mkNode(
- SUB,
- nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, s, k, one)),
- c0);
+ Node codeSk = nm->mkNode(SUB, mkCodePointAtIndex(s, k), c0);
Node ten = nm->mkConstInt(Rational(10));
Node kc3 = nm->mkNode(
OR, nm->mkNode(LT, codeSk, zero), nm->mkNode(GEQ, codeSk, ten));
Node x = SkolemCache::mkIndexVar(t);
Node xbv = nm->mkNode(BOUND_VAR_LIST, x);
Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, lens));
- Node sx = nm->mkNode(STRING_SUBSTR, s, x, one);
Node ux = nm->mkNode(APPLY_UF, u, x);
Node ux1 = nm->mkNode(APPLY_UF, u, nm->mkNode(ADD, x, one));
- Node c = nm->mkNode(SUB, nm->mkNode(STRING_TO_CODE, sx), c0);
+ Node c = nm->mkNode(SUB, mkCodePointAtIndex(s, x), c0);
Node eq = ux1.eqNode(nm->mkNode(ADD, c, nm->mkNode(MULT, ten, ux)));
Node cb = nm->mkNode(AND, nm->mkNode(GEQ, c, zero), nm->mkNode(LT, c, ten));
// nodes for the case where `seq.nth` is defined.
Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre");
Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
- Node unit = nm->mkNode(SEQ_UNIT, skt);
+ Node unit = utils::mkUnit(s.getType(), skt);
Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, unit, sk2));
// length of first skolem is second argument
Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
// n >=0 AND n < len( s )
// IMPLIES: s = sk1 ++ unit(skt) ++ sk2 AND
// len( sk1 ) = n AND
- // ( len( sk2 ) = len( s )- (n+1)
+ // len( sk2 ) = len( s )- (n+1)
asserts.push_back(lemma);
retNode = skt;
}
Node i = SkolemCache::mkIndexVar(t);
Node bvi = nm->mkNode(BOUND_VAR_LIST, i);
- Node ci = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, x, i, one));
- Node ri = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, r, i, one));
+ Node ci = mkCodePointAtIndex(x, i);
+ Node ri = mkCodePointAtIndex(r, i);
Node lb =
nm->mkConstInt(Rational(t.getKind() == STRING_TO_UPPER ? 97 : 65));
return retNode;
}
}
+Node StringsPreprocess::mkCodePointAtIndex(Node x, Node i)
+{
+ // we use (SEQ_NTH, x, i) instead of
+ // (STRING_TO_CODE, (STRING_SUBSTR, x, i, 1))
+ NodeManager* nm = NodeManager::currentNM();
+ return nm->mkNode(SEQ_NTH, x, i);
+}
Node StringsPreprocess::processAssertion(Node n, std::vector<Node>& asserts)
{
* visited stores a cache of previous results.
*/
Node simplifyRec(Node t, std::vector<Node>& asserts);
+ /**
+ * Makes the term returning the code point of string x at point i.
+ */
+ static Node mkCodePointAtIndex(Node x, Node i);
};
} // namespace strings
{
Solver slv;
slv.setOption("produce-unsat-cores", "true");
+ slv.setOption("strings-exp", "true");
Sort s1 = slv.mkBitVectorSort(1);
Sort s2 = slv.mkFloatingPointSort(8, 24);
Term val = slv.mkBitVector(32, "10000000110010111010111011000101", 2);
regress1/strings/strip-endpt-sound.smt2
regress1/strings/substr001.smt2
regress1/strings/tolower-find.smt2
+ regress1/strings/to_upper_12.smt2
+ regress1/strings/to_upper_over_concat.smt2
regress1/strings/timeout-no-resp.smt2
regress1/strings/type002.smt2
regress1/strings/type003.smt2
+; COMMAND-LINE: --strings-exp
; EXPECT: sat
(set-logic ALL)
(set-option :nl-ext-ent-conf true)
-; COMMAND-LINE: --no-strings-lazy-pp
+; COMMAND-LINE: --no-strings-lazy-pp --strings-exp
; EXPECT: sat
(set-logic ALL)
(declare-fun a () Int)
+; COMMAND-LINE: --no-strings-code-elim
+; EXPECT: sat
(set-logic ALL)
(set-info :status sat)
(declare-const x String)
-; COMMAND-LINE: --learned-rewrite
+; COMMAND-LINE: --learned-rewrite --strings-exp
; EXPECT: sat
(set-logic ALL)
(declare-const a String)
-; COMMAND-LINE: --lang=smt2.6 --produce-models
+; COMMAND-LINE: --lang=smt2.6 --produce-models --no-strings-code-elim
; EXPECT: sat
; EXPECT: ((x "\u{a}"))
; EXPECT: ((y "\u{7f}"))
-; COMMAND-LINE: --decision=justification
+; COMMAND-LINE: --decision=justification --no-strings-code-elim
; EXPECT: sat
(set-logic ALL)
(declare-const size4 String)
-; COMMAND-LINE: --arith-eq-solver --ee-mode=distributed
-; COMMAND-LINE: --arith-eq-solver --ee-mode=central
+; COMMAND-LINE: --arith-eq-solver --ee-mode=distributed --strings-exp
+; COMMAND-LINE: --arith-eq-solver --ee-mode=central --strings-exp
; EXPECT: unsat
(set-logic ALL)
(declare-fun v () String)
+; COMMAND-LINE: --no-strings-code-elim
+; EXPECT: sat
(set-logic QF_SLIA)
(set-info :status sat)
(declare-fun x () String)
--- /dev/null
+; COMMAND-LINE: --strings-exp --seq-array=lazy
+; EXPECT: sat
+(set-logic QF_SLIA)
+(declare-const X String)
+(assert (= (str.to_upper X) (str.to_lower X)))
+(assert (>= (str.len X) 12))
+(check-sat)
--- /dev/null
+; COMMAND-LINE: --strings-exp --seq-array=lazy
+; EXPECT: sat
+(set-logic QF_SLIA)
+(declare-const x String)
+(declare-const y String)
+(declare-const z String)
+(declare-const w String)
+(assert (not (= (str.to_upper x) (str.to_lower x))))
+(assert (or (= x (str.++ y z)) (= x (str.++ y w))))
+(assert (not (= y "")))
+(assert (not (= z "")))
+(assert (= (str.to_upper y) (str.to_lower y)))
+(assert (= (str.to_upper z) (str.to_lower z)))
+(assert (> (str.len x) 2))
+(check-sat)