Fix `str.replace_re` and `str.replace_re_all` (#6615)

author Andres Noetzli <andres.noetzli@gmail.com>

Thu, 27 May 2021 22:42:10 +0000 (15:42 -0700)

committer GitHub <noreply@github.com>

Thu, 27 May 2021 22:42:10 +0000 (22:42 +0000)
author Andres Noetzli <andres.noetzli@gmail.com>
Thu, 27 May 2021 22:42:10 +0000 (15:42 -0700)
committer GitHub <noreply@github.com>
Thu, 27 May 2021 22:42:10 +0000 (22:42 +0000)
diff --git a/NEWS b/NEWS

index 26e0edbb6f4b9d309a3a508e14a422268fc85619..54575dcc29b428a84536f3ac44ff1a3c31fcff53 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,10 @@ New Features:
    if arrays `a` and `b` are equal on all indices within indices `i` and `j`.
  * Support for an integer operator `(_ iand n)` that returns the bitwise `and`
    of two integers, seen as integers modulo n.
+* Strings:
+  * Support for `str.indexof_re(s, r, n)`, which returns the index of the first
+    occurrence of a regular expression `r` in a string `s` after index `n` or
+    -1 if `r` does not match a substring after `n`.
  
  Improvements:
  * New API: Added functions to retrieve the heap/nil term when using separation
diff --git a/src/api/cpp/cvc5.cpp b/src/api/cpp/cvc5.cpp

index 16366c3cfee9bbf51c479085f8e1c815bd883390..0f2d5ad1b132e328df3849e635ee63c3339c4abf 100644 (file)
--- a/src/api/cpp/cvc5.cpp
+++ b/src/api/cpp/cvc5.cpp
@@ -312,6 +312,7 @@ const static std::unordered_map<Kind, cvc5::Kind> s_kinds{
      {STRING_CHARAT, cvc5::Kind::STRING_CHARAT},
      {STRING_CONTAINS, cvc5::Kind::STRING_STRCTN},
      {STRING_INDEXOF, cvc5::Kind::STRING_STRIDOF},
+    {STRING_INDEXOF_RE, cvc5::Kind::STRING_INDEXOF_RE},
      {STRING_REPLACE, cvc5::Kind::STRING_STRREPL},
      {STRING_REPLACE_ALL, cvc5::Kind::STRING_STRREPLALL},
      {STRING_REPLACE_RE, cvc5::Kind::STRING_REPLACE_RE},
@@ -618,6 +619,7 @@ const static std::unordered_map<cvc5::Kind, Kind, cvc5::kind::KindHashFunction>
          {cvc5::Kind::STRING_CHARAT, STRING_CHARAT},
          {cvc5::Kind::STRING_STRCTN, STRING_CONTAINS},
          {cvc5::Kind::STRING_STRIDOF, STRING_INDEXOF},
+        {cvc5::Kind::STRING_INDEXOF_RE, STRING_INDEXOF_RE},
          {cvc5::Kind::STRING_STRREPL, STRING_REPLACE},
          {cvc5::Kind::STRING_STRREPLALL, STRING_REPLACE_ALL},
          {cvc5::Kind::STRING_REPLACE_RE, STRING_REPLACE_RE},
diff --git a/src/api/cpp/cvc5_kind.h b/src/api/cpp/cvc5_kind.h

index 6f6bf1d0ee37bee94227b8f18879f2e616d3f9c0..4263eedb2d80ac5e3075716d078beb9170b06777 100644 (file)
--- a/src/api/cpp/cvc5_kind.h
+++ b/src/api/cpp/cvc5_kind.h
@@ -2630,6 +2630,22 @@ enum CVC5_EXPORT Kind : int32_t
     *   - `Solver::mkTerm(Kind kind, const std::vector<Term>& children) const`
     */
    STRING_INDEXOF,
+  /**
+   * String index-of regular expression match.
+   * Returns the first match of a regular expression r in a string s. If the
+   * index is negative or greater than the length of string s1, or r does not
+   * match a substring in s after index i, the result is -1.
+   *
+   * Parameters:
+   *   - 1: Term of sort String (string s)
+   *   - 2: Term of sort RegLan (regular expression r)
+   *   - 3: Term of sort Integer (index i)
+   *
+   * Create with:
+   *   - `Solver::mkTerm(Kind kind, const Term& child1, const Term& child2, const Term& child3) const`
+   *   - `Solver::mkTerm(Kind kind, const std::vector<Term>& children) const`
+   */
+  STRING_INDEXOF_RE,
    /**
     * String replace.
     * Replaces a string s2 in a string s1 with string s3. If s2 does not appear
diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp

index 88f4b4ef8739e3d4b984f27b453a303f7ab65332..d32f149bf6d0f8fd1649cc7c886199caaff26848 100644 (file)
--- a/src/parser/smt2/smt2.cpp
+++ b/src/parser/smt2/smt2.cpp
@@ -158,6 +158,7 @@ void Smt2::addStringOperators() {
    addOperator(api::STRING_REPLACE_RE_ALL, "str.replace_re_all");
    if (!strictModeEnabled())
    {
+    addOperator(api::STRING_INDEXOF_RE, "str.indexof_re");
      addOperator(api::STRING_UPDATE, "str.update");
      addOperator(api::STRING_TOLOWER, "str.tolower");
      addOperator(api::STRING_TOUPPER, "str.toupper");
diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp

index 6258834c43b09aa929070bdeebbe7a606ce60bd0..4607d2747495a7bea06571052654b64d186a7378 100644 (file)
--- a/src/printer/smt2/smt2_printer.cpp
+++ b/src/printer/smt2/smt2_printer.cpp
@@ -723,6 +723,7 @@ void Smt2Printer::toStream(std::ostream& out,
    case kind::STRING_CHARAT:
    case kind::STRING_STRCTN:
    case kind::STRING_STRIDOF:
+  case kind::STRING_INDEXOF_RE:
    case kind::STRING_STRREPL:
    case kind::STRING_STRREPLALL:
    case kind::STRING_REPLACE_RE:
@@ -1309,6 +1310,7 @@ std::string Smt2Printer::smtKindString(Kind k, Variant v)
    case kind::STRING_STRCTN: return "str.contains" ;
    case kind::STRING_CHARAT: return "str.at" ;
    case kind::STRING_STRIDOF: return "str.indexof" ;
+  case kind::STRING_INDEXOF_RE: return "str.indexof_re";
    case kind::STRING_STRREPL: return "str.replace" ;
    case kind::STRING_STRREPLALL: return "str.replace_all";
    case kind::STRING_REPLACE_RE: return "str.replace_re";
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp

index a1de5e2951cb0cb52fb94a93fda03c0f21e352a1..9576c1d81b106a66b6c65bd735b348d4dced1bda 100644 (file)
--- a/src/theory/strings/extf_solver.cpp
+++ b/src/theory/strings/extf_solver.cpp
@@ -53,6 +53,7 @@ ExtfSolver::ExtfSolver(SolverState& s,
    d_extt.addFunctionKind(kind::STRING_SUBSTR);
    d_extt.addFunctionKind(kind::STRING_UPDATE);
    d_extt.addFunctionKind(kind::STRING_STRIDOF);
+  d_extt.addFunctionKind(kind::STRING_INDEXOF_RE);
    d_extt.addFunctionKind(kind::STRING_ITOS);
    d_extt.addFunctionKind(kind::STRING_STOI);
    d_extt.addFunctionKind(kind::STRING_STRREPL);
@@ -191,11 +192,11 @@ bool ExtfSolver::doReduction(int effort, Node n)
    {
      NodeManager* nm = NodeManager::currentNM();
      Assert(k == STRING_SUBSTR || k == STRING_UPDATE || k == STRING_STRCTN
-           || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
-           || k == STRING_STRREPL || k == STRING_STRREPLALL || k == SEQ_NTH
-           || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
-           || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
-           || k == STRING_REV);
+           || k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS
+           || k == STRING_STOI || k == STRING_STRREPL || k == STRING_STRREPLALL
+           || k == SEQ_NTH || k == STRING_REPLACE_RE
+           || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ
+           || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV);
      std::vector<Node> new_nodes;
      Node res = d_preproc.simplify(n, new_nodes);
      Assert(res != n);
diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds

index 1353ca9643241376b8c3c3078697aa8048046d02..743a5c0069c45f5003066b0da8f24777d3cf57fd 100644 (file)
--- a/src/theory/strings/kinds
+++ b/src/theory/strings/kinds
@@ -21,13 +21,12 @@ operator STRING_CHARAT 2 "string charat"
  operator STRING_STRCTN 2 "string contains"
  operator STRING_LT 2 "string less than"
  operator STRING_LEQ 2 "string less than or equal"
-operator STRING_STRIDOF 3 "string indexof"
+operator STRING_STRIDOF 3 "string index of substring"
+operator STRING_INDEXOF_RE 3 "string index of regular expression match"
  operator STRING_STRREPL 3 "string replace"
  operator STRING_STRREPLALL 3 "string replace all"
  operator STRING_REPLACE_RE 3 "string replace regular expression match"
  operator STRING_REPLACE_RE_ALL 3 "string replace all regular expression matches"
-typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>"
-typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>"
  operator STRING_PREFIX 2 "string prefixof"
  operator STRING_SUFFIX 2 "string suffixof"
  operator STRING_IS_DIGIT 1 "string isdigit, returns true if argument is a string of length one that represents a digit"
@@ -152,8 +151,11 @@ typerule STRING_UPDATE ::cvc5::theory::strings::StringUpdateTypeRule
  typerule STRING_CHARAT ::cvc5::theory::strings::StringAtTypeRule
  typerule STRING_STRCTN ::cvc5::theory::strings::StringRelationTypeRule
  typerule STRING_STRIDOF ::cvc5::theory::strings::StringIndexOfTypeRule
+typerule STRING_INDEXOF_RE "SimpleTypeRule<RInteger, AString, ARegExp, AInteger>"
  typerule STRING_STRREPL ::cvc5::theory::strings::StringReplaceTypeRule
  typerule STRING_STRREPLALL ::cvc5::theory::strings::StringReplaceTypeRule
+typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>"
+typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>"
  typerule STRING_PREFIX ::cvc5::theory::strings::StringStrToBoolTypeRule
  typerule STRING_SUFFIX ::cvc5::theory::strings::StringStrToBoolTypeRule
  typerule STRING_REV ::cvc5::theory::strings::StringStrToStrTypeRule
diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp

index 0c4e35df747bcd8e5e5c064529f3f28c0698c273..549d33efcbc4eda70106e303e5f4862f1a79654e 100644 (file)
--- a/src/theory/strings/rewrites.cpp
+++ b/src/theory/strings/rewrites.cpp
@@ -68,6 +68,10 @@ const char* toString(Rewrite r)
      case Rewrite::IDOF_PULL_ENDPT: return "IDOF_PULL_ENDPT";
      case Rewrite::IDOF_STRIP_CNST_ENDPTS: return "IDOF_STRIP_CNST_ENDPTS";
      case Rewrite::IDOF_STRIP_SYM_LEN: return "IDOF_STRIP_SYM_LEN";
+    case Rewrite::INDEXOF_RE_EMP_RE: return "INDEXOF_RE_EMP_RE";
+    case Rewrite::INDEXOF_RE_EVAL: return "INDEXOF_RE_EVAL";
+    case Rewrite::INDEXOF_RE_INVALID_INDEX: return "INDEXOF_RE_INVALID_INDEX";
+    case Rewrite::INDEXOF_RE_MAX_INDEX: return "INDEXOF_RE_MAX_INDEX";
      case Rewrite::ITOS_EVAL: return "ITOS_EVAL";
      case Rewrite::RE_AND_EMPTY: return "RE_AND_EMPTY";
      case Rewrite::RE_ANDOR_FLATTEN: return "RE_ANDOR_FLATTEN";
diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h

index 482666faa386b5e43a70682f981eb293175809a6..5e63c55c8e76f3a5fad382aeccbdd6c7b8be348e 100644 (file)
--- a/src/theory/strings/rewrites.h
+++ b/src/theory/strings/rewrites.h
@@ -73,6 +73,10 @@ enum class Rewrite : uint32_t
    IDOF_PULL_ENDPT,
    IDOF_STRIP_CNST_ENDPTS,
    IDOF_STRIP_SYM_LEN,
+  INDEXOF_RE_EMP_RE,
+  INDEXOF_RE_EVAL,
+  INDEXOF_RE_INVALID_INDEX,
+  INDEXOF_RE_MAX_INDEX,
    ITOS_EVAL,
    RE_AND_EMPTY,
    RE_ANDOR_FLATTEN,
diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp

index 15c37070a7569c62f0b972c5c8c86ed3ef4fcfc2..7ef1242c6dded7b6aeaa59088d288c326ee6eac1 100644 (file)
--- a/src/theory/strings/sequences_rewriter.cpp
+++ b/src/theory/strings/sequences_rewriter.cpp
@@ -1461,6 +1461,10 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node)
    {
      retNode = rewriteIndexof(node);
    }
+  else if (nk == kind::STRING_INDEXOF_RE)
+  {
+    retNode = rewriteIndexofRe(node);
+  }
    else if (nk == kind::STRING_STRREPL)
    {
      retNode = rewriteReplace(node);
@@ -2529,6 +2533,59 @@ Node SequencesRewriter::rewriteIndexof(Node node)
    return node;
  }
  
+Node SequencesRewriter::rewriteIndexofRe(Node node)
+{
+  Assert(node.getKind() == STRING_INDEXOF_RE);
+  NodeManager* nm = NodeManager::currentNM();
+  Node s = node[0];
+  Node r = node[1];
+  Node n = node[2];
+  Node zero = nm->mkConst(Rational(0));
+  Node slen = nm->mkNode(STRING_LENGTH, s);
+
+  if (ArithEntail::check(zero, n, true) || ArithEntail::check(n, slen, true))
+  {
+    Node ret = nm->mkConst(Rational(-1));
+    return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX);
+  }
+
+  if (RegExpEntail::isConstRegExp(r))
+  {
+    if (s.isConst() && n.isConst())
+    {
+      Rational nrat = n.getConst<Rational>();
+      cvc5::Rational rMaxInt(cvc5::String::maxSize());
+      if (nrat > rMaxInt)
+      {
+        // We know that, due to limitations on the size of string constants
+        // in our implementation, that accessing a position greater than
+        // rMaxInt is guaranteed to be out of bounds.
+        Node negone = nm->mkConst(Rational(-1));
+        return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX);
+      }
+
+      uint32_t start = nrat.getNumerator().toUnsignedInt();
+      Node rem = nm->mkConst(s.getConst<String>().substr(start));
+      std::pair<size_t, size_t> match = firstMatch(rem, r);
+      Node ret = nm->mkConst(
+          Rational(match.first == string::npos
+                       ? -1
+                       : static_cast<int64_t>(start + match.first)));
+      return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL);
+    }
+
+    if (ArithEntail::check(n, zero) && ArithEntail::check(slen, n))
+    {
+      String emptyStr("");
+      if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r))
+      {
+        return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE);
+      }
+    }
+  }
+  return node;
+}
+
  Node SequencesRewriter::rewriteReplace(Node node)
  {
    Assert(node.getKind() == kind::STRING_STRREPL);
@@ -3150,7 +3207,7 @@ Node SequencesRewriter::rewriteReplaceReAll(Node node)
        std::vector<Node> res;
        String rem = x.getConst<String>();
        std::pair<size_t, size_t> match(0, 0);
-      while (rem.size() >= 0)
+      while (rem.size() != 0)
        {
          match = firstMatch(nm->mkConst(rem), yp);
          if (match.first == string::npos)
diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h

index 7af24596ac2d6dff464c008907e99e48e1b8a7aa..37ed787860858ab329ade53da46224a6d9855e7d 100644 (file)
--- a/src/theory/strings/sequences_rewriter.h
+++ b/src/theory/strings/sequences_rewriter.h
@@ -194,6 +194,12 @@ class SequencesRewriter : public TheoryRewriter
     * Returns the rewritten form of node.
     */
    Node rewriteIndexof(Node node);
+  /** rewrite indexof regular expression match
+   * This is the entry point for post-rewriting terms n of the form
+   *   str.indexof_re( s, r, n )
+   * Returns the rewritten form of node.
+   */
+  Node rewriteIndexofRe(Node node);
    /** rewrite replace
     * This is the entry point for post-rewriting terms n of the form
     *   str.replace( s, t, r )
diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp

index 9b23301f3b173c920cba7d3ac9c8f95066e328b5..913518bc8b5d79485b66e485c67ff4a035554d8c 100644 (file)
--- a/src/theory/strings/skolem_cache.cpp
+++ b/src/theory/strings/skolem_cache.cpp
@@ -40,6 +40,16 @@ struct IndexVarAttributeId
  };
  typedef expr::Attribute<IndexVarAttributeId, Node> IndexVarAttribute;
  
+/**
+ * A bound variable corresponding to the universally quantified integer
+ * variable used to range over the valid lengths of a string, used for
+ * axiomatizing the behavior of some term.
+ */
+struct LengthVarAttributeId
+{
+};
+typedef expr::Attribute<LengthVarAttributeId, Node> LengthVarAttribute;
+
  SkolemCache::SkolemCache(bool useOpts) : d_useOpts(useOpts)
  {
    NodeManager* nm = NodeManager::currentNM();
@@ -300,6 +310,14 @@ Node SkolemCache::mkIndexVar(Node t)
    return bvm->mkBoundVar<IndexVarAttribute>(t, intType);
  }
  
+Node SkolemCache::mkLengthVar(Node t)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  TypeNode intType = nm->integerType();
+  BoundVarManager* bvm = nm->getBoundVarManager();
+  return bvm->mkBoundVar<LengthVarAttribute>(t, intType);
+}
+
  }  // namespace strings
  }  // namespace theory
  }  // namespace cvc5
diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h

index 126ee313d624c0999a7b1a07319ced514f572dbe..f0376dbc6037754e05b8374a86a812b92157e850 100644 (file)
--- a/src/theory/strings/skolem_cache.h
+++ b/src/theory/strings/skolem_cache.h
@@ -107,9 +107,14 @@ class SkolemCache
      // in_re(a, re.++(_*, b, _*)) =>
      //    exists k_pre, k_match, k_post.
      //       a = k_pre ++ k_match ++ k_post ^
-    //       ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1),
-    //              re.++(_*, b, _*)) ^
-    //       in_re(k2, y)
+    //       len(k_pre) = indexof_re(x, y, 0) ^
+    //       (forall l. 0 < l < len(k_match) =>
+    //         ~in_re(substr(k_match, 0, l), r)) ^
+    //       in_re(k_match, b)
+    //
+    // k_pre is the prefix before the first, shortest match of b in a. k_match
+    // is the substring of a matched by b. It is either empty or there is no
+    // shorter string that matches b.
      SK_FIRST_MATCH_PRE,
      SK_FIRST_MATCH,
      SK_FIRST_MATCH_POST,
@@ -180,6 +185,16 @@ class SkolemCache
     */
    static Node mkIndexVar(Node t);
  
+  /** Make length variable
+   *
+   * This returns an integer variable of kind BOUND_VARIABLE that is used for
+   * axiomatizing the behavior of a term or predicate t. It refers to lengths
+   * of strings in the reduction of t. For example, the length variable for the
+   * term str.indexof(s, r, n) is used to quantify over the lengths of strings
+   * that could be matched by r.
+   */
+  static Node mkLengthVar(Node t);
+
   private:
    /**
     * Simplifies the arguments for a string skolem used for indexing into the
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp

index af2eecde88c9779e93bc87295476dfa13aa709ce..c7b3b5300e91137f6b3fd03df72dbdbe557b41bd 100644 (file)
--- a/src/theory/strings/term_registry.cpp
+++ b/src/theory/strings/term_registry.cpp
@@ -88,10 +88,11 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc)
              LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
      lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
    }
-  else if (tk == STRING_STRIDOF)
+  else if (tk == STRING_STRIDOF || tk == STRING_INDEXOF_RE)
    {
-    // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len
-    // x)))
+    // (and (>= (f x y n) (- 1)) (<= (f x y n) (str.len x)))
+    //
+    // where f in { str.indexof, str.indexof_re }
      Node l = nm->mkNode(STRING_LENGTH, t[0]);
      lemma = nm->mkNode(AND,
                         nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))),
@@ -143,11 +144,12 @@ void TermRegistry::preRegisterTerm(TNode n)
    Kind k = n.getKind();
    if (!options::stringExp())
    {
-    if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
-        || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL
-        || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
-        || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
-        || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE)
+    if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS
+        || k == STRING_STOI || k == STRING_STRREPL || k == STRING_SUBSTR
+        || k == STRING_STRREPLALL || k == SEQ_NTH || k == STRING_REPLACE_RE
+        || k == STRING_REPLACE_RE_ALL || k == STRING_STRCTN || k == STRING_LEQ
+        || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV
+        || k == STRING_UPDATE)
      {
        std::stringstream ss;
        ss << "Term of kind " << k
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp

index 06a39ec648888dd86e2198c625f09265ddad1f6c..b1f5a0765bd7538e888aba72832c3e13bf977a84 100644 (file)
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -130,6 +130,7 @@ void TheoryStrings::finishInit()
    d_equalityEngine->addFunctionKind(kind::STRING_ITOS, eagerEval);
    d_equalityEngine->addFunctionKind(kind::STRING_STOI, eagerEval);
    d_equalityEngine->addFunctionKind(kind::STRING_STRIDOF, eagerEval);
+  d_equalityEngine->addFunctionKind(kind::STRING_INDEXOF_RE, eagerEval);
    d_equalityEngine->addFunctionKind(kind::STRING_STRREPL, eagerEval);
    d_equalityEngine->addFunctionKind(kind::STRING_STRREPLALL, eagerEval);
    d_equalityEngine->addFunctionKind(kind::STRING_REPLACE_RE, eagerEval);
diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp

index 356ae28acba6ba30d887a14bf5ebd327fd59b5e1..83a4fa04a352fb193be727ef1162a8ed82178dda 100644 (file)
--- a/src/theory/strings/theory_strings_preprocess.cpp
+++ b/src/theory/strings/theory_strings_preprocess.cpp
@@ -253,6 +253,89 @@ Node StringsPreprocess::reduce(Node t,
      // Thus, indexof( x, y, n ) = skk.
      retNode = skk;
    }
+  else if (t.getKind() == kind::STRING_INDEXOF_RE)
+  {
+    // processing term:  indexof_re(s, r, n)
+    Node s = t[0];
+    Node r = t[1];
+    Node n = t[2];
+    Node skk = sc->mkTypedSkolemCached(
+        nm->integerType(), t, SkolemCache::SK_PURIFY, "iork");
+    Node sLen = nm->mkNode(STRING_LENGTH, s);
+
+    // n > len(x)
+    Node ub = nm->mkNode(GT, n, sLen);
+    // 0 > n
+    Node lb = nm->mkNode(GT, zero, n);
+    // n > len(x) OR 0 > n
+    Node condNegOne = nm->mkNode(OR, ub, lb);
+    // skk = -1
+    Node retNegOne = skk.eqNode(negOne);
+
+    Node emp = Word::mkEmptyWord(s.getType());
+    // in_re("", r)
+    Node matchEmptyStr = nm->mkNode(STRING_IN_REGEXP, emp, r);
+    // skk = n
+    Node retN = skk.eqNode(n);
+
+    Node i = SkolemCache::mkIndexVar(t);
+    Node l = SkolemCache::mkLengthVar(t);
+    Node bvl = nm->mkNode(BOUND_VAR_LIST, i, l);
+    Node bound =
+        nm->mkNode(AND,
+                   {
+                       nm->mkNode(GEQ, i, n),
+                       nm->mkNode(LT, i, nm->mkNode(ITE, retNegOne, sLen, skk)),
+                       nm->mkNode(GT, l, zero),
+                       nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, i)),
+                   });
+    Node body = nm->mkNode(
+        OR,
+        bound.negate(),
+        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, i, l), r)
+            .negate());
+    // forall il.
+    //   n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
+    //     ~in_re(substr(s, i, l), r)
+    Node firstMatch = mkForallInternal(bvl, body);
+    Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
+    Node validLen =
+        nm->mkNode(AND,
+                   nm->mkNode(GEQ, l, n),
+                   nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk)));
+    Node matchBody = nm->mkNode(
+        AND,
+        validLen,
+        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r));
+    // skk != -1 =>
+    //   exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r))
+    Node match = nm->mkNode(
+        OR, retNegOne, mkForallInternal(bvll, matchBody.negate()).negate());
+
+    // assert:
+    // IF:   n > len(s) OR 0 > n
+    // THEN: skk = -1
+    // ELIF: in_re("", r)
+    // THEN: skk = n
+    // ELSE: (forall il.
+    //         n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
+    //           ~in_re(substr(s, i, l), r)) ^
+    //       (skk != -1 =>
+    //          exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r))
+    //
+    // Note that this reduction relies on eager reduction lemmas being sent to
+    // properly limit the range of skk.
+    Node rr = nm->mkNode(
+        ITE,
+        condNegOne,
+        retNegOne,
+        nm->mkNode(
+            ITE, matchEmptyStr, retN, nm->mkNode(AND, firstMatch, match)));
+    asserts.push_back(rr);
+
+    // Thus, indexof_re(s, r, n) = skk.
+    retNode = skk;
+  }
    else if (t.getKind() == STRING_ITOS)
    {
      // processing term:  int.to.str( n )
@@ -596,17 +679,14 @@ Node StringsPreprocess::reduce(Node t,
    }
    else if (t.getKind() == STRING_REPLACE_RE)
    {
+    // processing term: replace_re( x, y, z )
      Node x = t[0];
      Node y = t[1];
      Node z = t[2];
      Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
  
-    std::vector<Node> emptyVec;
-    Node sigmaStar =
-        nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
-    Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, y, sigmaStar);
-    // in_re(x, re.++(_*, y, _*))
-    Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re);
+    // indexof_re(x, y, 0)
+    Node idx = nm->mkNode(STRING_INDEXOF_RE, x, y, zero);
  
      // in_re("", y)
      Node matchesEmpty =
@@ -620,42 +700,40 @@ Node StringsPreprocess::reduce(Node t,
          sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match");
      Node k3 =
          sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post");
+    Node k2Len = nm->mkNode(STRING_LENGTH, k2);
      // x = k1 ++ k2 ++ k3
-    Node splitX = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
-    // ~in_re(k1 ++ str.substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*))
-    Node k2len = nm->mkNode(STRING_LENGTH, k2);
-    Node firstMatch =
-        nm->mkNode(
-              STRING_IN_REGEXP,
-              nm->mkNode(
-                  STRING_CONCAT,
-                  k1,
-                  nm->mkNode(
-                      STRING_SUBSTR, k2, zero, nm->mkNode(MINUS, k2len, one))),
-              re)
-            .negate();
+    Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
+    // len(k1) = indexof_re(x, y, 0)
+    Node k1Len = nm->mkNode(STRING_LENGTH, k1).eqNode(idx);
+    Node l = SkolemCache::mkLengthVar(t);
+    Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
+    Node bound =
+        nm->mkNode(AND, nm->mkNode(LEQ, zero, l), nm->mkNode(LT, l, k2Len));
+    Node body = nm->mkNode(
+        OR,
+        bound.negate(),
+        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, k2, zero, l), y)
+            .negate());
+    // forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)
+    Node shortestMatch = mkForallInternal(bvll, body);
      // in_re(k2, y)
-    Node k2Match = nm->mkNode(STRING_IN_REGEXP, k2, y);
+    Node match = nm->mkNode(STRING_IN_REGEXP, k2, y);
      // k = k1 ++ z ++ k3
-    Node res2 = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3));
+    Node res = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3));
  
-    // IF in_re(x, re.++(_*, y, _*))
-    // THEN:
-    //   IF in_re("", y)
-    //   THEN: k = z ++ x
-    //   ELSE:
-    //     x = k1 ++ k2 ++ k3 ^
-    //     ~in_re(k1 ++ substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) ^
-    //     in_re(k2, y) ^ k = k1 ++ z ++ k3
-    // ELSE: k = x
-    asserts.push_back(nm->mkNode(
-        ITE,
-        hasMatch,
+    // IF: indexof_re(x, y, 0) = -1
+    // THEN: k = x
+    // ELSE:
+    //   x = k1 ++ k2 ++ k3 ^
+    //   len(k1) = indexof_re(x, y, 0) ^
+    //   (forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)) ^
+    //   in_re(k2, y) ^
+    //   k = k1 ++ z ++ k3
+    asserts.push_back(
          nm->mkNode(ITE,
-                   matchesEmpty,
-                   res1,
-                   nm->mkNode(AND, {splitX, firstMatch, k2Match, res2})),
-        k.eqNode(x)));
+                   idx.eqNode(negOne),
+                   k.eqNode(x),
+                   nm->mkNode(AND, {split, k1Len, shortestMatch, match, res})));
      retNode = k;
    }
    else if (t.getKind() == STRING_REPLACE_RE_ALL)
@@ -679,13 +757,10 @@ Node StringsPreprocess::reduce(Node t,
  
      Node emp = Word::mkEmptyWord(t.getType());
  
-    std::vector<Node> emptyVec;
-    Node sigmaStar =
-        nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
      Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp));
-    Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, yp, sigmaStar);
-    // in_re(x, _* ++ y' ++ _*)
-    Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re);
+    Node idx = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero);
+    // indexof_re(x, y', 0) = -1
+    Node noMatch = idx.eqNode(negOne);
  
      Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
      Node usno = nm->mkNode(APPLY_UF, us, numOcc);
@@ -700,8 +775,9 @@ Node StringsPreprocess::reduce(Node t,
      lemmas.push_back(usno.eqNode(rem));
      // Uf(0) = 0
      lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero));
-    // not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*)))
-    lemmas.push_back(nm->mkNode(STRING_IN_REGEXP, rem, re).negate());
+    // indexof_re(substr(x, Uf(numOcc)), y', 0) = -1
+    lemmas.push_back(
+        nm->mkNode(STRING_INDEXOF_RE, rem, yp, zero).eqNode(negOne));
  
      Node i = SkolemCache::mkIndexVar(t);
      Node bvli = nm->mkNode(BOUND_VAR_LIST, i);
@@ -709,50 +785,57 @@ Node StringsPreprocess::reduce(Node t,
          nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc));
      Node ip1 = nm->mkNode(PLUS, i, one);
      Node ufi = nm->mkNode(APPLY_UF, uf, i);
-    Node uli = nm->mkNode(APPLY_UF, ul, i);
      Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1);
-    Node ii = nm->mkNode(MINUS, ufip1, uli);
-    Node match = nm->mkNode(STRING_SUBSTR, x, ii, uli);
-    Node pfxMatch =
-        nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi));
-    Node nonMatch =
-        nm->mkNode(STRING_SUBSTR,
-                   x,
-                   ufi,
-                   nm->mkNode(MINUS, nm->mkNode(MINUS, ufip1, one), ufi));
+    Node ulip1 = nm->mkNode(APPLY_UF, ul, ip1);
+    // ii = Uf(i + 1) - Ul(i + 1)
+    Node ii = nm->mkNode(MINUS, ufip1, ulip1);
  
      std::vector<Node> flem;
      // Ul(i) > 0
-    flem.push_back(nm->mkNode(GT, uli, zero));
-    // Uf(i + 1) >= Uf(i) + Ul(i)
-    flem.push_back(nm->mkNode(GEQ, ufip1, nm->mkNode(PLUS, ufi, uli)));
-    // in_re(substr(x, ii, Ul(i)), y')
-    flem.push_back(nm->mkNode(STRING_IN_REGEXP, match, yp));
-    // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*))
-    flem.push_back(nm->mkNode(STRING_IN_REGEXP, nonMatch, re).negate());
+    flem.push_back(nm->mkNode(GT, ulip1, zero));
+    // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1)
+    flem.push_back(ufip1.eqNode(
+        nm->mkNode(PLUS, nm->mkNode(STRING_INDEXOF_RE, x, yp, ufi), ulip1)));
+    // in_re(substr(x, ii, Ul(i + 1)), y')
+    flem.push_back(nm->mkNode(
+        STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, ulip1), yp));
+    Node l = SkolemCache::mkLengthVar(t);
+    Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
+    Node lenBound =
+        nm->mkNode(AND, nm->mkNode(LT, zero, l), nm->mkNode(LT, l, ulip1));
+    Node shortestMatchBody = nm->mkNode(
+        OR,
+        lenBound.negate(),
+        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, l), y)
+            .negate());
+    // forall l. 0 < l < Ul(i + 1) =>
+    //   ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y')
+    flem.push_back(mkForallInternal(bvll, shortestMatchBody));
+    Node pfxMatch =
+        nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi));
      // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
      flem.push_back(
          nm->mkNode(APPLY_UF, us, i)
              .eqNode(nm->mkNode(
                  STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1))));
-
      Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem));
      Node forall = mkForallInternal(bvli, body);
      lemmas.push_back(forall);
  
-    // IF in_re(x, re.++(_*, y', _*))
-    // THEN:
+    // IF indexof(x, y', 0) = -1
+    // THEN: k = x
+    // ELSE:
      //   numOcc > 0 ^
      //   k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^
-    //   Uf(0) = 0 ^ not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*)))
+    //   Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^
      //   forall i. 0 <= i < nummOcc =>
      //     Ul(i) > 0 ^
-    //     Uf(i + 1) >= Uf(i) + Ul(i) ^
-    //     in_re(substr(x, ii, Ul(i)), y') ^
-    //     ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) ^
+    //     Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^
+    //     in_re(substr(x, ii, Ul(i + 1)), y') ^
+    //     forall l. 0 < l < Ul(i + 1) =>
+    //       ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y')
      //     Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
      //     where ii = Uf(i + 1) - Ul(i)
-    // ELSE: k = x
      // where y' = re.diff(y, "")
      //
      // Conceptually, y' is the regex y but excluding the empty string (because
@@ -760,8 +843,17 @@ Node StringsPreprocess::reduce(Node t,
      // matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i)
      // is the length of the i^th match, and Us(i) is the result of processing
      // the remainder after processing the i^th occurrence of y in x.
+    //
+    // Visualization of Uf(i) and Ul(i):
+    //
+    // x = |------------| match 1 |-----------| match 2   |---|
+    //     |                      |                       |
+    //     Uf(0)                  Uf(1)                   Uf(2)
+    //
+    //                  |---------|           |-----------|
+    //                    Ul(1)                 Ul(2)
      asserts.push_back(
-        nm->mkNode(ITE, hasMatch, nm->mkNode(AND, lemmas), k.eqNode(x)));
+        nm->mkNode(ITE, noMatch, k.eqNode(x), nm->mkNode(AND, lemmas)));
      retNode = k;
    }
    else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER)
diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp

index 6897d5272e51a5f90ccb86c7579a193a87500caf..c763557cab9ef625e019aefe43f6c2a6f2ffca78 100644 (file)
--- a/src/theory/strings/theory_strings_utils.cpp
+++ b/src/theory/strings/theory_strings_utils.cpp
@@ -383,8 +383,9 @@ TypeNode getOwnerStringType(Node n)
  {
    TypeNode tn;
    Kind k = n.getKind();
-  if (k == STRING_STRIDOF || k == STRING_LENGTH || k == STRING_STRCTN
-      || k == SEQ_NTH || k == STRING_PREFIX || k == STRING_SUFFIX)
+  if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH
+      || k == STRING_STRCTN || k == SEQ_NTH || k == STRING_PREFIX
+      || k == STRING_SUFFIX)
    {
      // owning string type is the type of first argument
      tn = n[0].getType();
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt

index 35d2553de78a3764eb828b4e7d859687056ca5a0..422acd0489baba77aa9d580a73f992c1a541798f 100644 (file)
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -1116,6 +1116,7 @@ set(regress_0_tests
    regress0/strings/idof-sem.smt2
    regress0/strings/ilc-like.smt2
    regress0/strings/indexof-sym-simp.smt2
+  regress0/strings/indexof_re.smt2
    regress0/strings/is_digit_simple.smt2
    regress0/strings/issue1189.smt2
    regress0/strings/issue2958.smt2
@@ -2047,6 +2048,7 @@ set(regress_1_tests
    regress1/strings/cmu-substr-rw.smt2
    regress1/strings/code-sequence.smt2
    regress1/strings/complement-test.smt2
+  regress1/strings/indexof_re_red.smt2
    regress1/strings/crash-1019.smt2
    regress1/strings/csp-prefix-exp-bug.smt2
    regress1/strings/double-replace.smt2
@@ -2089,6 +2091,8 @@ set(regress_1_tests
    regress1/strings/issue5692-infer-proxy.smt2
    regress1/strings/issue5940-skc-len-conc.smt2
    regress1/strings/issue5940-2-skc-len-conc.smt2
+  regress1/strings/issue6057-replace-re.smt2
+  regress1/strings/issue6057-replace-re-all-jiwonparc.smt2
    regress1/strings/issue6072-inc-no-const-reg.smt2
    regress1/strings/issue6075-repl-len-one-rr.smt2
    regress1/strings/issue6132-non-unique-skolem.smt2
@@ -2096,12 +2100,15 @@ set(regress_1_tests
    regress1/strings/issue6191-replace-all.smt2
    regress1/strings/issue6203-1-substr-ctn-strip.smt2
    regress1/strings/issue6203-2-re-ccache.smt2
+  regress1/strings/issue6203-6-replace-re.smt2
    regress1/strings/issue6214-2-sym-re-inc.smt2
    regress1/strings/issue6214-3-sym-re-inc.smt2
    regress1/strings/issue6214-4-sym-re-inc.smt2
    regress1/strings/issue6270.smt2
    regress1/strings/issue6271-rnf.smt2
    regress1/strings/issue6271-2-rnf.smt2
+  regress1/strings/issue6337-replace-re-all.smt2
+  regress1/strings/issue6337-replace-re.smt2
    regress1/strings/issue6567-empty-re-range.smt2
    regress1/strings/issue6604-2.smt2
    regress1/strings/kaluza-fl.smt2
@@ -2441,6 +2448,8 @@ set(regress_2_tests
    regress2/strings/issue3203.smt2
    regress2/strings/issue5381.smt2
    regress2/strings/issue6483.smt2
+  regress2/strings/issue6057-replace-re-all.smt2
+  regress2/strings/issue6057-replace-re-all-simplified.smt2
    regress2/strings/issue918.smt2
    regress2/strings/non_termination_regular_expression6.smt2
    regress2/strings/range-perf.smt2
@@ -2532,6 +2541,7 @@ set(regress_3_tests
    regress3/specsharp-WindowsCard.15.RTE.Terminate_System.Int32.smt2
    regress3/strings-any-term.sy
    regress3/strings/extf_d_perf.smt2
+  regress3/strings/indexof_re_red.smt2
    regress3/strings/norn-dis-0707-3.smt2
    regress3/strings/replace_re_all.smt2
    regress3/unbdd_inv_gen_ex7.sy
diff --git a/test/regress/regress0/strings/indexof_re.smt2 b/test/regress/regress0/strings/indexof_re.smt2

new file mode 100644 (file)

index 0000000..72c5d7e
--- /dev/null
+++ b/test/regress/regress0/strings/indexof_re.smt2
@@ -0,0 +1,18 @@
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-const x String)
+(assert (or
+  (not (= (str.indexof_re "abc" re.allchar (- 1)) (- 1)))
+  (not (= (str.indexof_re "abc" re.allchar (- 2)) (- 1)))
+  (not (= (str.indexof_re "abc" re.allchar 5) (- 1)))
+  (not (= (str.indexof_re "abc" re.allchar 0) 0))
+  (not (= (str.indexof_re "abc" re.allchar 1) 1))
+  (not (= (str.indexof_re "abc" re.allchar 2) 2))
+  (not (= (str.indexof_re "abc" re.allchar 3) (- 1)))
+  (not (= (str.indexof_re "abc" (re.++ re.allchar re.allchar) 2) (- 1)))
+  (not (= (str.indexof_re "abc" (re.union (str.to_re "") re.allchar) 3) 3))
+  (not (= (str.indexof_re (str.++ "abc" x) (re.union (str.to_re "") re.allchar) 3) 3))
+  (not (= (str.indexof_re "cbabc" (re.union (str.to_re "a") (str.to_re "bab")) 0) 1))
+))
+(set-info :status unsat)
+(check-sat)
diff --git a/test/regress/regress1/strings/indexof_re_red.smt2 b/test/regress/regress1/strings/indexof_re_red.smt2

new file mode 100644 (file)

index 0000000..f32a672
--- /dev/null
+++ b/test/regress/regress1/strings/indexof_re_red.smt2
@@ -0,0 +1,52 @@
+; COMMAND-LINE: --strings-exp --incremental
+(set-logic QF_SLIA)
+(declare-const x String)
+(declare-const i Int)
+
+(push)
+(assert (= i (str.indexof_re (str.++ x "abc") (re.++ re.allchar (str.to_re "b")) 0)))
+(assert (= i (str.len x)))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re (str.++ x "abc") (re.++ re.allchar (str.to_re "b")) 0)))
+(assert (= i (+ (str.len x) 1)))
+(set-info :status unsat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re x (re.++ re.all (str.to_re "a") re.all) 0)))
+(assert (= i 0))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re x (re.++ re.all (str.to_re "a") re.all) 0)))
+(assert (= i (- 1)))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= 2 (str.indexof_re x (str.to_re "a") 1)))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re x (str.to_re "") 0)))
+(assert (= i (str.len x)))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re x (re.comp (str.to_re "")) 0)))
+(assert (= i (str.len x)))
+(set-info :status unsat)
+(check-sat)
+(pop)
diff --git a/test/regress/regress1/strings/issue6057-replace-re-all-jiwonparc.smt2 b/test/regress/regress1/strings/issue6057-replace-re-all-jiwonparc.smt2

new file mode 100644 (file)

index 0000000..c9d93d0
--- /dev/null
+++ b/test/regress/regress1/strings/issue6057-replace-re-all-jiwonparc.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun a () String)
+; A complicated way of saying a = "b"
+(assert (str.in_re a (re.++ (re.* (re.opt (str.to_re a))) (str.to_re "b"))))
+; Corresponds to replace_re_all("ab", a*b, "") contains "a"
+(assert (str.contains (str.replace_re_all (str.++ "a" a) (re.++ (re.* (str.to_re "a")) (str.to_re "b")) "") "a"))
+(set-info :status unsat)
+(check-sat)
diff --git a/test/regress/regress1/strings/issue6057-replace-re.smt2 b/test/regress/regress1/strings/issue6057-replace-re.smt2

new file mode 100644 (file)

index 0000000..192b244
--- /dev/null
+++ b/test/regress/regress1/strings/issue6057-replace-re.smt2
@@ -0,0 +1,7 @@
+(set-logic QF_SLIA)
+(declare-fun a () String)
+(assert (str.suffixof "a" a))
+(assert (str.in_re a (re.* (re.union (str.to_re (str.replace_re a (re.++ (re.* (str.to_re "a")) (str.to_re "ba")) "")) (str.to_re "b")))))
+(assert (str.in_re a (re.++ (re.* (str.to_re (ite (str.in_re a (re.* (str.to_re "b"))) "" "u"))) (re.* (str.to_re a)))))
+(set-info :status sat)
+(check-sat)
diff --git a/test/regress/regress1/strings/issue6203-6-replace-re.smt2 b/test/regress/regress1/strings/issue6203-6-replace-re.smt2

new file mode 100644 (file)

index 0000000..d5e6acd
--- /dev/null
+++ b/test/regress/regress1/strings/issue6203-6-replace-re.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun a () String)
+(assert (not (str.in_re (str.replace_re a (re.++ (re.* (re.union (str.to_re "a") (str.to_re ""))) (str.to_re "b")) "") (str.to_re ""))))
+(assert (ite (str.in_re a (re.diff (re.* (re.union (str.to_re "a") (str.to_re "b"))) (re.range "a" "u")))
+    (str.in_re a (re.diff (re.* (re.union (str.to_re "a") (str.to_re "b"))) (re.range "a" "u")))
+    (str.<= a "b")))
+(set-info :status sat)
+(check-sat)
diff --git a/test/regress/regress1/strings/issue6337-replace-re-all.smt2 b/test/regress/regress1/strings/issue6337-replace-re-all.smt2

new file mode 100644 (file)

index 0000000..9ae168b
--- /dev/null
+++ b/test/regress/regress1/strings/issue6337-replace-re-all.smt2
@@ -0,0 +1,10 @@
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun str3 () String)
+(declare-fun str8 () String)
+(declare-fun str12 () String)
+(declare-fun str14 () String)
+(declare-fun str15 () String)
+(assert (distinct str15 (str.++ str14 str3 str8 str14) (str.replace_re_all str12 (re.comp (str.to_re str15)) (str.++ str14 str3 str8 str14)) str12))
+(set-info :status sat)
+(check-sat)
diff --git a/test/regress/regress1/strings/issue6337-replace-re.smt2 b/test/regress/regress1/strings/issue6337-replace-re.smt2

new file mode 100644 (file)

index 0000000..78895b2
--- /dev/null
+++ b/test/regress/regress1/strings/issue6337-replace-re.smt2
@@ -0,0 +1,13 @@
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun a () String)
+(declare-fun b () String)
+(assert
+ (str.in_re
+  (str.replace_re (str.++ a "zb")
+   (re.union (str.to_re "z") (str.to_re "a")
+    (re.++ (re.* (str.to_re a))
+     (re.union (str.to_re "z") (str.to_re "a")))) b)
+  (re.opt (str.to_re "bb"))))
+(set-info :status sat)
+(check-sat)
diff --git a/test/regress/regress2/strings/issue6057-replace-re-all-simplified.smt2 b/test/regress/regress2/strings/issue6057-replace-re-all-simplified.smt2

new file mode 100644 (file)

index 0000000..83860ef
--- /dev/null
+++ b/test/regress/regress2/strings/issue6057-replace-re-all-simplified.smt2
@@ -0,0 +1,11 @@
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun literal_5 () String)                                               
+(assert (not (=
+  literal_5
+  (str.replace_re_all
+    literal_5
+    (re.++ (re.* re.allchar) (str.to_re "\u{5c}\u{3c}\u{53}\u{43}\u{52}\u{49}\u{50}\u{54}") (re.* re.allchar))
+    literal_5))))
+(set-info :status sat)
+(check-sat)  
diff --git a/test/regress/regress2/strings/issue6057-replace-re-all.smt2 b/test/regress/regress2/strings/issue6057-replace-re-all.smt2

new file mode 100644 (file)

index 0000000..9819b75
--- /dev/null
+++ b/test/regress/regress2/strings/issue6057-replace-re-all.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun literal_0 () String)
+(assert (and (str.<= (str.++ literal_0 "\u{2f}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2f}\u{74}\u{79}\u{70}\u{65}\u{2f}\u{6e}\u{75}\u{6d}\u{65}\u{72}\u{69}\u{63}\u{61}\u{6c}\u{2f}\u{65}\u{64}\u{69}\u{74}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2e}\u{68}\u{74}\u{6d}\u{6c}") 
+(str.replace_re_all (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0) (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0))) 
+(str.<= (str.++ literal_0 "\u{2f}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2f}\u{74}\u{79}\u{70}\u{65}\u{2f}\u{6e}\u{75}\u{6d}\u{65}\u{72}\u{69}\u{63}\u{61}\u{6c}\u{2f}\u{65}\u{64}\u{69}\u{74}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2e}\u{68}\u{74}\u{6d}\u{6c}") (str.replace_re_all (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0) (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0)))))
+(assert (not (str.<= "\u{2f}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2f}\u{74}\u{79}\u{70}\u{65}\u{2f}\u{6e}\u{75}\u{6d}\u{65}\u{72}\u{69}\u{63}\u{61}\u{6c}\u{2f}\u{65}\u{64}\u{69}\u{74}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2e}\u{68}\u{74}\u{6d}\u{6c}" "\u{2f}\u{65}\u{76}\u{69}\u{6c}")))
+(set-info :status sat)
+(check-sat)
diff --git a/test/regress/regress2/strings/replace_re.smt2 b/test/regress/regress2/strings/replace_re.smt2

index 1f9b2a2ee84cd30de830cc1bbbdf5d18c4d21cba..a58314150b14f96d27562bf58c2a59dda2fc17c0 100644 (file)
--- a/test/regress/regress2/strings/replace_re.smt2
+++ b/test/regress/regress2/strings/replace_re.smt2
@@ -40,3 +40,11 @@
  (set-info :status unsat)
  (check-sat)
  (pop)
+
+(push)
+(assert (= "FOO" (str.replace_re x (re.union (str.to_re "A") (str.to_re "ABC")) "FOO")))
+(assert (not (= x "FOO")))
+(assert (> (str.len x) 1))
+(set-info :status unsat)
+(check-sat)
+(pop)
diff --git a/test/regress/regress3/strings/indexof_re_red.smt2 b/test/regress/regress3/strings/indexof_re_red.smt2

new file mode 100644 (file)

index 0000000..0e115cf
--- /dev/null
+++ b/test/regress/regress3/strings/indexof_re_red.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-const x String)
+(declare-const i Int)
+
+(assert (= i (str.indexof_re x (re.++ re.all (str.to_re "a")) 0)))
+(assert (not (or (= i 0) (= i (- 1)))))
+(set-info :status unsat)
+(check-sat)
author	Andres Noetzli <andres.noetzli@gmail.com>
	Thu, 27 May 2021 22:42:10 +0000 (15:42 -0700)
committer	GitHub <noreply@github.com>
	Thu, 27 May 2021 22:42:10 +0000 (22:42 +0000)
NEWS		patch \| blob \| history
src/api/cpp/cvc5.cpp		patch \| blob \| history
src/api/cpp/cvc5_kind.h		patch \| blob \| history
src/parser/smt2/smt2.cpp		patch \| blob \| history
src/printer/smt2/smt2_printer.cpp		patch \| blob \| history
src/theory/strings/extf_solver.cpp		patch \| blob \| history
src/theory/strings/kinds		patch \| blob \| history
src/theory/strings/rewrites.cpp		patch \| blob \| history
src/theory/strings/rewrites.h		patch \| blob \| history
src/theory/strings/sequences_rewriter.cpp		patch \| blob \| history
src/theory/strings/sequences_rewriter.h		patch \| blob \| history
src/theory/strings/skolem_cache.cpp		patch \| blob \| history
src/theory/strings/skolem_cache.h		patch \| blob \| history
src/theory/strings/term_registry.cpp		patch \| blob \| history
src/theory/strings/theory_strings.cpp		patch \| blob \| history
src/theory/strings/theory_strings_preprocess.cpp		patch \| blob \| history
src/theory/strings/theory_strings_utils.cpp		patch \| blob \| history
test/regress/CMakeLists.txt		patch \| blob \| history
test/regress/regress0/strings/indexof_re.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/indexof_re_red.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/issue6057-replace-re-all-jiwonparc.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/issue6057-replace-re.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/issue6203-6-replace-re.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/issue6337-replace-re-all.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/issue6337-replace-re.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress2/strings/issue6057-replace-re-all-simplified.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress2/strings/issue6057-replace-re-all.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress2/strings/replace_re.smt2		patch \| blob \| history
test/regress/regress3/strings/indexof_re_red.smt2	[new file with mode: 0644]	patch \| blob