From 70dca46f1caf99a42c572bd5e245e827eb4e3c58 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Fri, 13 May 2022 11:21:52 -0500 Subject: [PATCH] Add utilities in preparation for supporting str.nth (#8766) Work towards efficient support for to_lower/to_upper. --- src/theory/evaluator.cpp | 17 +++++++++++++++++ src/theory/strings/kinds | 4 +++- src/theory/strings/theory_strings_utils.cpp | 17 ++++++++++++++++- src/theory/strings/theory_strings_utils.h | 6 ++++++ src/theory/strings/word.cpp | 19 +++++++++++++++++++ src/theory/strings/word.h | 6 ++++++ 6 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/theory/evaluator.cpp b/src/theory/evaluator.cpp index 51e0357e6..bd82153cd 100644 --- a/src/theory/evaluator.cpp +++ b/src/theory/evaluator.cpp @@ -591,6 +591,23 @@ EvalResult Evaluator::evalInternal( } break; } + case kind::SEQ_NTH: + { + // only strings evaluate + Assert (currNode[0].getType().isString()); + const String& s = results[currNode[0]].d_str; + Integer s_len(s.size()); + Integer i = results[currNode[1]].d_rat.getNumerator(); + if (i.strictlyNegative() || i >= s_len) + { + results[currNode] = EvalResult(s); + } + else + { + results[currNode] = EvalResult(Rational(s.getVec()[i.toUnsignedInt()])); + } + break; + } case kind::STRING_UPDATE: { diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 5f14efce7..8d9e427ca 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -33,7 +33,8 @@ operator STRING_IS_DIGIT 1 "string isdigit, returns true if argument is a string operator STRING_ITOS 1 "integer to string" operator STRING_STOI 1 "string to integer (total function)" operator STRING_TO_CODE 1 "string to code, returns the code of the first character of the string if it has length one, -1 otherwise" -operator STRING_FROM_CODE 1 "string from code, returns a string containing a single character whose code point matches the argument to this function, empty string if the argument is out-of-bounds" +operator STRING_FROM_CODE 1 "string from code, returns a string containing a single character whose code point matches the argument to this function, empty string if the argument is not a valid code point" +operator STRING_UNIT 1 "string unit, returns a string containing a single character whose code point matches the argument to this function, arbitrary string of length one if the argument is not a valid code point" operator STRING_TO_LOWER 1 "string to lowercase conversion" operator STRING_TO_UPPER 1 "string to uppercase conversion" operator STRING_REV 1 "string reverse" @@ -186,6 +187,7 @@ typerule STRING_ITOS "SimpleTypeRule" typerule STRING_STOI "SimpleTypeRule" typerule STRING_TO_CODE "SimpleTypeRule" typerule STRING_FROM_CODE "SimpleTypeRule" +typerule STRING_UNIT "SimpleTypeRule" typerule STRING_TO_UPPER "SimpleTypeRule" typerule STRING_TO_LOWER "SimpleTypeRule" diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index c45e062b4..34488806b 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -154,6 +154,18 @@ Node mkSuffix(Node t, Node n) STRING_SUBSTR, t, n, nm->mkNode(SUB, nm->mkNode(STRING_LENGTH, t), n)); } +Node mkUnit(TypeNode tn, Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + if (tn.isString()) + { + return nm->mkNode(STRING_UNIT, n); + } + Assert(tn.isSequence()); + TypeNode etn = tn.getSequenceElementType(); + return nm->mkSeqUnit(etn, n); +} + Node getConstantComponent(Node t) { if (t.getKind() == STRING_TO_REGEXP) @@ -259,7 +271,10 @@ std::pair > collectEmptyEqs(Node x) allEmptyEqs, std::vector(emptyNodes.begin(), emptyNodes.end())); } -bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; } +bool isConstantLike(Node n) +{ + return n.isConst() || n.getKind() == SEQ_UNIT || n.getKind() == STRING_UNIT; +} bool isUnboundedWildcard(const std::vector& rs, size_t start) { diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index 5c7a55c26..e6671d87e 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -70,6 +70,12 @@ Node mkPrefix(Node t, Node n); */ Node mkSuffix(Node t, Node n); +/** + * Make a unit, returns either (str.unit n) or (seq.unit n) depending + * on if tn is a string or a sequence. + */ +Node mkUnit(TypeNode tn, Node n); + /** * Get constant component. Returns the string constant represented by the * string or regular expression t. For example: diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp index da1236662..7836b446e 100644 --- a/src/theory/strings/word.cpp +++ b/src/theory/strings/word.cpp @@ -124,6 +124,25 @@ std::vector Word::getChars(TNode x) return ret; } +Node Word::getNth(TNode x, size_t n) +{ + Kind k = x.getKind(); + if (k == CONST_STRING) + { + const std::vector& vec = x.getConst().getVec(); + Assert(n < vec.size()); + return NodeManager::currentNM()->mkConstInt(vec[n]); + } + else if (k == CONST_SEQUENCE) + { + const std::vector& vec = x.getConst().getVec(); + Assert(n < vec.size()); + return vec[n]; + } + Unimplemented(); + return Node::null(); +} + bool Word::isEmpty(TNode x) { return x.isConst() && getLength(x) == 0; } bool Word::strncmp(TNode x, TNode y, std::size_t n) diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h index 75c83f80c..72c311120 100644 --- a/src/theory/strings/word.h +++ b/src/theory/strings/word.h @@ -46,6 +46,12 @@ class Word * concatenation is equivalent to x. */ static std::vector getChars(TNode x); + /** + * Get nth. If x is a string constant, this returns the constant integer + * corresponding to the code point of x at position n. If x is a sequence + * constant, then this returns the nth element of x. + */ + static Node getNth(TNode x, size_t n); /** Return true if x is empty */ static bool isEmpty(TNode x); -- 2.30.2