Split sequences rewriter (#4194)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Fri, 3 Apr 2020 14:43:12 +0000 (09:43 -0500)

committer GitHub <noreply@github.com>

Fri, 3 Apr 2020 14:43:12 +0000 (09:43 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Fri, 3 Apr 2020 14:43:12 +0000 (09:43 -0500)
committer GitHub <noreply@github.com>
Fri, 3 Apr 2020 14:43:12 +0000 (09:43 -0500)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt

index c1c8c15e03efc2a689c2ad81c1dd9f8695dc3883..447ca90f8999e0d5e39535c0a5a2dc5ece71b2b7 100644 (file)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -656,6 +656,8 @@ libcvc4_add_sources(
    theory/smt_engine_subsolver.h
    theory/sort_inference.cpp
    theory/sort_inference.h
+  theory/strings/arith_entail.cpp
+  theory/strings/arith_entail.h
    theory/strings/base_solver.cpp
    theory/strings/base_solver.h
    theory/strings/core_solver.cpp
@@ -672,6 +674,8 @@ libcvc4_add_sources(
    theory/strings/normal_form.h
    theory/strings/regexp_elim.cpp
    theory/strings/regexp_elim.h
+  theory/strings/regexp_entail.cpp
+  theory/strings/regexp_entail.h
    theory/strings/regexp_operation.cpp
    theory/strings/regexp_operation.h
    theory/strings/regexp_solver.cpp
@@ -686,6 +690,8 @@ libcvc4_add_sources(
    theory/strings/skolem_cache.h
    theory/strings/solver_state.cpp
    theory/strings/solver_state.h
+  theory/strings/strings_entail.cpp
+  theory/strings/strings_entail.h
    theory/strings/strings_fmf.cpp
    theory/strings/strings_fmf.h
    theory/strings/strings_rewriter.cpp
diff --git a/src/theory/strings/arith_entail.cpp b/src/theory/strings/arith_entail.cpp

new file mode 100644 (file)

index 0000000..5933f25
--- /dev/null
+++ b/src/theory/strings/arith_entail.cpp
@@ -0,0 +1,865 @@
+/*********************                                                        */
+/*! \file arith_entail.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of arithmetic entailment computation for string terms.
+ **/
+
+#include "theory/strings/arith_entail.h"
+
+#include "expr/attribute.h"
+#include "theory/arith/arith_msum.h"
+#include "theory/rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+#include "theory/theory.h"
+
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+bool ArithEntail::checkEq(Node a, Node b)
+{
+  if (a == b)
+  {
+    return true;
+  }
+  Node ar = Rewriter::rewrite(a);
+  Node br = Rewriter::rewrite(b);
+  return ar == br;
+}
+
+bool ArithEntail::check(Node a, Node b, bool strict)
+{
+  if (a == b)
+  {
+    return !strict;
+  }
+  Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b);
+  return check(diff, strict);
+}
+
+struct StrCheckEntailArithTag
+{
+};
+struct StrCheckEntailArithComputedTag
+{
+};
+/** Attribute true for expressions for which check returned true */
+typedef expr::Attribute<StrCheckEntailArithTag, bool> StrCheckEntailArithAttr;
+typedef expr::Attribute<StrCheckEntailArithComputedTag, bool>
+    StrCheckEntailArithComputedAttr;
+
+bool ArithEntail::check(Node a, bool strict)
+{
+  if (a.isConst())
+  {
+    return a.getConst<Rational>().sgn() >= (strict ? 1 : 0);
+  }
+
+  Node ar =
+      strict
+          ? NodeManager::currentNM()->mkNode(
+                kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1)))
+          : a;
+  ar = Rewriter::rewrite(ar);
+
+  if (ar.getAttribute(StrCheckEntailArithComputedAttr()))
+  {
+    return ar.getAttribute(StrCheckEntailArithAttr());
+  }
+
+  bool ret = checkInternal(ar);
+  if (!ret)
+  {
+    // try with approximations
+    ret = checkApprox(ar);
+  }
+  // cache the result
+  ar.setAttribute(StrCheckEntailArithAttr(), ret);
+  ar.setAttribute(StrCheckEntailArithComputedAttr(), true);
+  return ret;
+}
+
+bool ArithEntail::checkApprox(Node ar)
+{
+  Assert(Rewriter::rewrite(ar) == ar);
+  NodeManager* nm = NodeManager::currentNM();
+  std::map<Node, Node> msum;
+  Trace("strings-ent-approx-debug")
+      << "Setup arithmetic approximations for " << ar << std::endl;
+  if (!ArithMSum::getMonomialSum(ar, msum))
+  {
+    Trace("strings-ent-approx-debug")
+        << "...failed to get monomial sum!" << std::endl;
+    return false;
+  }
+  // for each monomial v*c, mApprox[v] a list of
+  // possibilities for how the term can be soundly approximated, that is,
+  // if mApprox[v] contains av, then v*c > av*c. Notice that if c
+  // is positive, then v > av, otherwise if c is negative, then v < av.
+  // In other words, av is an under-approximation if c is positive, and an
+  // over-approximation if c is negative.
+  bool changed = false;
+  std::map<Node, std::vector<Node> > mApprox;
+  // map from approximations to their monomial sums
+  std::map<Node, std::map<Node, Node> > approxMsums;
+  // aarSum stores each monomial that does not have multiple approximations
+  std::vector<Node> aarSum;
+  for (std::pair<const Node, Node>& m : msum)
+  {
+    Node v = m.first;
+    Node c = m.second;
+    Trace("strings-ent-approx-debug")
+        << "Get approximations " << v << "..." << std::endl;
+    if (v.isNull())
+    {
+      Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c;
+      aarSum.push_back(mn);
+    }
+    else
+    {
+      // c.isNull() means c = 1
+      bool isOverApprox = !c.isNull() && c.getConst<Rational>().sgn() == -1;
+      std::vector<Node>& approx = mApprox[v];
+      std::unordered_set<Node, NodeHashFunction> visited;
+      std::vector<Node> toProcess;
+      toProcess.push_back(v);
+      do
+      {
+        Node curr = toProcess.back();
+        Trace("strings-ent-approx-debug") << "  process " << curr << std::endl;
+        curr = Rewriter::rewrite(curr);
+        toProcess.pop_back();
+        if (visited.find(curr) == visited.end())
+        {
+          visited.insert(curr);
+          std::vector<Node> currApprox;
+          getArithApproximations(curr, currApprox, isOverApprox);
+          if (currApprox.empty())
+          {
+            Trace("strings-ent-approx-debug")
+                << "...approximation: " << curr << std::endl;
+            // no approximations, thus curr is a possibility
+            approx.push_back(curr);
+          }
+          else
+          {
+            toProcess.insert(
+                toProcess.end(), currApprox.begin(), currApprox.end());
+          }
+        }
+      } while (!toProcess.empty());
+      Assert(!approx.empty());
+      // if we have only one approximation, move it to final
+      if (approx.size() == 1)
+      {
+        changed = v != approx[0];
+        Node mn = ArithMSum::mkCoeffTerm(c, approx[0]);
+        aarSum.push_back(mn);
+        mApprox.erase(v);
+      }
+      else
+      {
+        // compute monomial sum form for each approximation, used below
+        for (const Node& aa : approx)
+        {
+          if (approxMsums.find(aa) == approxMsums.end())
+          {
+            CVC4_UNUSED bool ret =
+                ArithMSum::getMonomialSum(aa, approxMsums[aa]);
+            Assert(ret);
+          }
+        }
+        changed = true;
+      }
+    }
+  }
+  if (!changed)
+  {
+    // approximations had no effect, return
+    Trace("strings-ent-approx-debug") << "...no approximations" << std::endl;
+    return false;
+  }
+  // get the current "fixed" sum for the abstraction of ar
+  Node aar = aarSum.empty()
+                 ? nm->mkConst(Rational(0))
+                 : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum));
+  aar = Rewriter::rewrite(aar);
+  Trace("strings-ent-approx-debug")
+      << "...processed fixed sum " << aar << " with " << mApprox.size()
+      << " approximated monomials." << std::endl;
+  // if we have a choice of how to approximate
+  if (!mApprox.empty())
+  {
+    // convert aar back to monomial sum
+    std::map<Node, Node> msumAar;
+    if (!ArithMSum::getMonomialSum(aar, msumAar))
+    {
+      return false;
+    }
+    if (Trace.isOn("strings-ent-approx"))
+    {
+      Trace("strings-ent-approx")
+          << "---- Check arithmetic entailment by under-approximation " << ar
+          << " >= 0" << std::endl;
+      Trace("strings-ent-approx") << "FIXED:" << std::endl;
+      ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx");
+      Trace("strings-ent-approx") << "APPROX:" << std::endl;
+      for (std::pair<const Node, std::vector<Node> >& a : mApprox)
+      {
+        Node c = msum[a.first];
+        Trace("strings-ent-approx") << "  ";
+        if (!c.isNull())
+        {
+          Trace("strings-ent-approx") << c << " * ";
+        }
+        Trace("strings-ent-approx")
+            << a.second << " ...from " << a.first << std::endl;
+      }
+      Trace("strings-ent-approx") << std::endl;
+    }
+    Rational one(1);
+    // incorporate monomials one at a time that have a choice of approximations
+    while (!mApprox.empty())
+    {
+      Node v;
+      Node vapprox;
+      int maxScore = -1;
+      // Look at each approximation, take the one with the best score.
+      // Notice that we are in the process of trying to prove
+      // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0,
+      // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar)
+      // and approx_1 ... approx_m are possible approximations. The
+      // intution here is that we want coefficients c1...cn to be positive.
+      // This is because arithmetic string terms t1...tn (which may be
+      // applications of len, indexof, str.to.int) are never entailed to be
+      // negative. Hence, we add the approx_i that contributes the "most"
+      // towards making all constants c1...cn positive and cancelling negative
+      // monomials in approx_i itself.
+      for (std::pair<const Node, std::vector<Node> >& nam : mApprox)
+      {
+        Node cr = msum[nam.first];
+        for (const Node& aa : nam.second)
+        {
+          unsigned helpsCancelCount = 0;
+          unsigned addsObligationCount = 0;
+          std::map<Node, Node>::iterator it;
+          // we are processing an approximation cr*( c1*t1 + ... + cn*tn )
+          for (std::pair<const Node, Node>& aam : approxMsums[aa])
+          {
+            // Say aar is of the form t + c*ti, and aam is the monomial ci*ti
+            // where ci != 0. We say aam:
+            // (1) helps cancel if c != 0 and c>0 != ci>0
+            // (2) adds obligation if c>=0 and c+ci<0
+            Node ti = aam.first;
+            Node ci = aam.second;
+            if (!cr.isNull())
+            {
+              ci = ci.isNull() ? cr
+                               : Rewriter::rewrite(nm->mkNode(MULT, ci, cr));
+            }
+            Trace("strings-ent-approx-debug") << ci << "*" << ti << " ";
+            int ciSgn = ci.isNull() ? 1 : ci.getConst<Rational>().sgn();
+            it = msumAar.find(ti);
+            if (it != msumAar.end())
+            {
+              Node c = it->second;
+              int cSgn = c.isNull() ? 1 : c.getConst<Rational>().sgn();
+              if (cSgn == 0)
+              {
+                addsObligationCount += (ciSgn == -1 ? 1 : 0);
+              }
+              else if (cSgn != ciSgn)
+              {
+                helpsCancelCount++;
+                Rational r1 = c.isNull() ? one : c.getConst<Rational>();
+                Rational r2 = ci.isNull() ? one : ci.getConst<Rational>();
+                Rational r12 = r1 + r2;
+                if (r12.sgn() == -1)
+                {
+                  addsObligationCount++;
+                }
+              }
+            }
+            else
+            {
+              addsObligationCount += (ciSgn == -1 ? 1 : 0);
+            }
+          }
+          Trace("strings-ent-approx-debug")
+              << "counts=" << helpsCancelCount << "," << addsObligationCount
+              << " for " << aa << " into " << aar << std::endl;
+          int score = (addsObligationCount > 0 ? 0 : 2)
+                      + (helpsCancelCount > 0 ? 1 : 0);
+          // if its the best, update v and vapprox
+          if (v.isNull() || score > maxScore)
+          {
+            v = nam.first;
+            vapprox = aa;
+            maxScore = score;
+          }
+        }
+        if (!v.isNull())
+        {
+          break;
+        }
+      }
+      Trace("strings-ent-approx")
+          << "- Decide " << v << " = " << vapprox << std::endl;
+      // we incorporate v approximated by vapprox into the overall approximation
+      // for ar
+      Assert(!v.isNull() && !vapprox.isNull());
+      Assert(msum.find(v) != msum.end());
+      Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox);
+      aar = nm->mkNode(PLUS, aar, mn);
+      // update the msumAar map
+      aar = Rewriter::rewrite(aar);
+      msumAar.clear();
+      if (!ArithMSum::getMonomialSum(aar, msumAar))
+      {
+        Assert(false);
+        Trace("strings-ent-approx")
+            << "...failed to get monomial sum!" << std::endl;
+        return false;
+      }
+      // we have processed the approximation for v
+      mApprox.erase(v);
+    }
+    Trace("strings-ent-approx") << "-----------------" << std::endl;
+  }
+  if (aar == ar)
+  {
+    Trace("strings-ent-approx-debug")
+        << "...approximation had no effect" << std::endl;
+    // this should never happen, but we avoid the infinite loop for sanity here
+    Assert(false);
+    return false;
+  }
+  // Check entailment on the approximation of ar.
+  // Notice that this may trigger further reasoning by approximation. For
+  // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be
+  // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on
+  // this call, where in the recursive call we may over-approximate
+  // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer
+  // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two
+  // steps.
+  if (check(aar))
+  {
+    Trace("strings-ent-approx")
+        << "*** StrArithApprox: showed " << ar
+        << " >= 0 using under-approximation!" << std::endl;
+    Trace("strings-ent-approx")
+        << "*** StrArithApprox: under-approximation was " << aar << std::endl;
+    return true;
+  }
+  return false;
+}
+
+void ArithEntail::getArithApproximations(Node a,
+                                         std::vector<Node>& approx,
+                                         bool isOverApprox)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  // We do not handle PLUS here since this leads to exponential behavior.
+  // Instead, this is managed, e.g. during checkApprox, where
+  // PLUS terms are expanded "on-demand" during the reasoning.
+  Trace("strings-ent-approx-debug")
+      << "Get arith approximations " << a << std::endl;
+  Kind ak = a.getKind();
+  if (ak == MULT)
+  {
+    Node c;
+    Node v;
+    if (ArithMSum::getMonomial(a, c, v))
+    {
+      bool isNeg = c.getConst<Rational>().sgn() == -1;
+      getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox);
+      for (unsigned i = 0, size = approx.size(); i < size; i++)
+      {
+        approx[i] = nm->mkNode(MULT, c, approx[i]);
+      }
+    }
+  }
+  else if (ak == STRING_LENGTH)
+  {
+    Kind aak = a[0].getKind();
+    if (aak == STRING_SUBSTR)
+    {
+      // over,under-approximations for len( substr( x, n, m ) )
+      Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
+      if (isOverApprox)
+      {
+        // m >= 0 implies
+        //  m >= len( substr( x, n, m ) )
+        if (check(a[0][2]))
+        {
+          approx.push_back(a[0][2]);
+        }
+        if (check(lenx, a[0][1]))
+        {
+          // n <= len( x ) implies
+          //   len( x ) - n >= len( substr( x, n, m ) )
+          approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
+        }
+        else
+        {
+          // len( x ) >= len( substr( x, n, m ) )
+          approx.push_back(lenx);
+        }
+      }
+      else
+      {
+        // 0 <= n and n+m <= len( x ) implies
+        //   m <= len( substr( x, n, m ) )
+        Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]);
+        if (check(a[0][1]) && check(lenx, npm))
+        {
+          approx.push_back(a[0][2]);
+        }
+        // 0 <= n and n+m >= len( x ) implies
+        //   len(x)-n <= len( substr( x, n, m ) )
+        if (check(a[0][1]) && check(npm, lenx))
+        {
+          approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
+        }
+      }
+    }
+    else if (aak == STRING_STRREPL)
+    {
+      // over,under-approximations for len( replace( x, y, z ) )
+      // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) )
+      Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
+      Node leny = nm->mkNode(STRING_LENGTH, a[0][1]);
+      Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]);
+      if (isOverApprox)
+      {
+        if (check(leny, lenz))
+        {
+          // len( y ) >= len( z ) implies
+          //   len( x ) >= len( replace( x, y, z ) )
+          approx.push_back(lenx);
+        }
+        else
+        {
+          // len( x ) + len( z ) >= len( replace( x, y, z ) )
+          approx.push_back(nm->mkNode(PLUS, lenx, lenz));
+        }
+      }
+      else
+      {
+        if (check(lenz, leny) || check(lenz, lenx))
+        {
+          // len( y ) <= len( z ) or len( x ) <= len( z ) implies
+          //   len( x ) <= len( replace( x, y, z ) )
+          approx.push_back(lenx);
+        }
+        else
+        {
+          // len( x ) - len( y ) <= len( replace( x, y, z ) )
+          approx.push_back(nm->mkNode(MINUS, lenx, leny));
+        }
+      }
+    }
+    else if (aak == STRING_ITOS)
+    {
+      // over,under-approximations for len( int.to.str( x ) )
+      if (isOverApprox)
+      {
+        if (check(a[0][0], false))
+        {
+          if (check(a[0][0], true))
+          {
+            // x > 0 implies
+            //   x >= len( int.to.str( x ) )
+            approx.push_back(a[0][0]);
+          }
+          else
+          {
+            // x >= 0 implies
+            //   x+1 >= len( int.to.str( x ) )
+            approx.push_back(
+                nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0]));
+          }
+        }
+      }
+      else
+      {
+        if (check(a[0][0]))
+        {
+          // x >= 0 implies
+          //   len( int.to.str( x ) ) >= 1
+          approx.push_back(nm->mkConst(Rational(1)));
+        }
+        // other crazy things are possible here, e.g.
+        // len( int.to.str( len( y ) + 10 ) ) >= 2
+      }
+    }
+  }
+  else if (ak == STRING_STRIDOF)
+  {
+    // over,under-approximations for indexof( x, y, n )
+    if (isOverApprox)
+    {
+      Node lenx = nm->mkNode(STRING_LENGTH, a[0]);
+      Node leny = nm->mkNode(STRING_LENGTH, a[1]);
+      if (check(lenx, leny))
+      {
+        // len( x ) >= len( y ) implies
+        //   len( x ) - len( y ) >= indexof( x, y, n )
+        approx.push_back(nm->mkNode(MINUS, lenx, leny));
+      }
+      else
+      {
+        // len( x ) >= indexof( x, y, n )
+        approx.push_back(lenx);
+      }
+    }
+    else
+    {
+      // TODO?:
+      // contains( substr( x, n, len( x ) ), y ) implies
+      //   n <= indexof( x, y, n )
+      // ...hard to test, runs risk of non-termination
+
+      // -1 <= indexof( x, y, n )
+      approx.push_back(nm->mkConst(Rational(-1)));
+    }
+  }
+  else if (ak == STRING_STOI)
+  {
+    // over,under-approximations for str.to.int( x )
+    if (isOverApprox)
+    {
+      // TODO?:
+      // y >= 0 implies
+      //   y >= str.to.int( int.to.str( y ) )
+    }
+    else
+    {
+      // -1 <= str.to.int( x )
+      approx.push_back(nm->mkConst(Rational(-1)));
+    }
+  }
+  Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl;
+}
+
+bool ArithEntail::checkWithEqAssumption(Node assumption, Node a, bool strict)
+{
+  Assert(assumption.getKind() == kind::EQUAL);
+  Assert(Rewriter::rewrite(assumption) == assumption);
+
+  // Find candidates variables to compute substitutions for
+  std::unordered_set<Node, NodeHashFunction> candVars;
+  std::vector<Node> toVisit = {assumption};
+  while (!toVisit.empty())
+  {
+    Node curr = toVisit.back();
+    toVisit.pop_back();
+
+    if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT
+        || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL)
+    {
+      for (const auto& currChild : curr)
+      {
+        toVisit.push_back(currChild);
+      }
+    }
+    else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH)
+    {
+      candVars.insert(curr);
+    }
+    else if (curr.getKind() == kind::STRING_LENGTH)
+    {
+      candVars.insert(curr);
+    }
+  }
+
+  // Check if any of the candidate variables are in n
+  Node v;
+  Assert(toVisit.empty());
+  toVisit.push_back(a);
+  while (!toVisit.empty())
+  {
+    Node curr = toVisit.back();
+    toVisit.pop_back();
+
+    for (const auto& currChild : curr)
+    {
+      toVisit.push_back(currChild);
+    }
+
+    if (candVars.find(curr) != candVars.end())
+    {
+      v = curr;
+      break;
+    }
+  }
+
+  if (v.isNull())
+  {
+    // No suitable candidate found
+    return false;
+  }
+
+  Node solution = ArithMSum::solveEqualityFor(assumption, v);
+  if (solution.isNull())
+  {
+    // Could not solve for v
+    return false;
+  }
+
+  a = a.substitute(TNode(v), TNode(solution));
+  return check(a, strict);
+}
+
+bool ArithEntail::checkWithAssumption(Node assumption,
+                                      Node a,
+                                      Node b,
+                                      bool strict)
+{
+  Assert(Rewriter::rewrite(assumption) == assumption);
+
+  NodeManager* nm = NodeManager::currentNM();
+
+  if (!assumption.isConst() && assumption.getKind() != kind::EQUAL)
+  {
+    // We rewrite inequality assumptions from x <= y to x + (str.len s) = y
+    // where s is some fresh string variable. We use (str.len s) because
+    // (str.len s) must be non-negative for the equation to hold.
+    Node x, y;
+    if (assumption.getKind() == kind::GEQ)
+    {
+      x = assumption[0];
+      y = assumption[1];
+    }
+    else
+    {
+      // (not (>= s t)) --> (>= (t - 1) s)
+      Assert(assumption.getKind() == kind::NOT
+             && assumption[0].getKind() == kind::GEQ);
+      x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1)));
+      y = assumption[0][0];
+    }
+
+    Node s = nm->mkBoundVar("slackVal", nm->stringType());
+    Node slen = nm->mkNode(kind::STRING_LENGTH, s);
+    assumption = Rewriter::rewrite(
+        nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen)));
+  }
+
+  Node diff = nm->mkNode(kind::MINUS, a, b);
+  bool res = false;
+  if (assumption.isConst())
+  {
+    bool assumptionBool = assumption.getConst<bool>();
+    if (assumptionBool)
+    {
+      res = check(diff, strict);
+    }
+    else
+    {
+      res = true;
+    }
+  }
+  else
+  {
+    res = checkWithEqAssumption(assumption, diff, strict);
+  }
+  return res;
+}
+
+bool ArithEntail::checkWithAssumptions(std::vector<Node> assumptions,
+                                       Node a,
+                                       Node b,
+                                       bool strict)
+{
+  // TODO: We currently try to show the entailment with each assumption
+  // independently. In the future, we should make better use of multiple
+  // assumptions.
+  bool res = false;
+  for (const auto& assumption : assumptions)
+  {
+    Assert(Rewriter::rewrite(assumption) == assumption);
+
+    if (checkWithAssumption(assumption, a, b, strict))
+    {
+      res = true;
+      break;
+    }
+  }
+  return res;
+}
+
+Node ArithEntail::getConstantBound(Node a, bool isLower)
+{
+  Assert(Rewriter::rewrite(a) == a);
+  Node ret;
+  if (a.isConst())
+  {
+    ret = a;
+  }
+  else if (a.getKind() == kind::STRING_LENGTH)
+  {
+    if (isLower)
+    {
+      ret = NodeManager::currentNM()->mkConst(Rational(0));
+    }
+  }
+  else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
+  {
+    std::vector<Node> children;
+    bool success = true;
+    for (unsigned i = 0; i < a.getNumChildren(); i++)
+    {
+      Node ac = getConstantBound(a[i], isLower);
+      if (ac.isNull())
+      {
+        ret = ac;
+        success = false;
+        break;
+      }
+      else
+      {
+        if (ac.getConst<Rational>().sgn() == 0)
+        {
+          if (a.getKind() == kind::MULT)
+          {
+            ret = ac;
+            success = false;
+            break;
+          }
+        }
+        else
+        {
+          if (a.getKind() == kind::MULT)
+          {
+            if ((ac.getConst<Rational>().sgn() > 0) != isLower)
+            {
+              ret = Node::null();
+              success = false;
+              break;
+            }
+          }
+          children.push_back(ac);
+        }
+      }
+    }
+    if (success)
+    {
+      if (children.empty())
+      {
+        ret = NodeManager::currentNM()->mkConst(Rational(0));
+      }
+      else if (children.size() == 1)
+      {
+        ret = children[0];
+      }
+      else
+      {
+        ret = NodeManager::currentNM()->mkNode(a.getKind(), children);
+        ret = Rewriter::rewrite(ret);
+      }
+    }
+  }
+  Trace("strings-rewrite-cbound")
+      << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a
+      << " is " << ret << std::endl;
+  Assert(ret.isNull() || ret.isConst());
+  // entailment check should be at least as powerful as computing a lower bound
+  Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() < 0
+         || check(a, false));
+  Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() <= 0
+         || check(a, true));
+  return ret;
+}
+
+bool ArithEntail::checkInternal(Node a)
+{
+  Assert(Rewriter::rewrite(a) == a);
+  // check whether a >= 0
+  if (a.isConst())
+  {
+    return a.getConst<Rational>().sgn() >= 0;
+  }
+  else if (a.getKind() == kind::STRING_LENGTH)
+  {
+    // str.len( t ) >= 0
+    return true;
+  }
+  else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
+  {
+    for (unsigned i = 0; i < a.getNumChildren(); i++)
+    {
+      if (!checkInternal(a[i]))
+      {
+        return false;
+      }
+    }
+    // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0
+    return true;
+  }
+
+  return false;
+}
+
+bool ArithEntail::inferZerosInSumGeq(Node x,
+                                     std::vector<Node>& ys,
+                                     std::vector<Node>& zeroYs)
+{
+  Assert(zeroYs.empty());
+
+  NodeManager* nm = NodeManager::currentNM();
+
+  // Check if we can show that y1 + ... + yn >= x
+  Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0];
+  if (!check(sum, x))
+  {
+    return false;
+  }
+
+  // Try to remove yi one-by-one and check if we can still show:
+  //
+  // y1 + ... + yi-1 +  yi+1 + ... + yn >= x
+  //
+  // If that's the case, we know that yi can be zero and the inequality still
+  // holds.
+  size_t i = 0;
+  while (i < ys.size())
+  {
+    Node yi = ys[i];
+    std::vector<Node>::iterator pos = ys.erase(ys.begin() + i);
+    if (ys.size() > 1)
+    {
+      sum = nm->mkNode(PLUS, ys);
+    }
+    else
+    {
+      sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0));
+    }
+
+    if (check(sum, x))
+    {
+      zeroYs.push_back(yi);
+    }
+    else
+    {
+      ys.insert(pos, yi);
+      i++;
+    }
+  }
+  return true;
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/strings/arith_entail.h b/src/theory/strings/arith_entail.h

new file mode 100644 (file)

index 0000000..266f726
--- /dev/null
+++ b/src/theory/strings/arith_entail.h
@@ -0,0 +1,180 @@
+/*********************                                                        */
+/*! \file arith_entail.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Arithmetic entailment computation for string terms.
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__ARITH_ENTAIL_H
+#define CVC4__THEORY__STRINGS__ARITH_ENTAIL_H
+
+#include <vector>
+
+#include "expr/node.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+/**
+ * Techniques for computing arithmetic entailment for string terms. This
+ * is an implementation of the techniques from Reynolds et al, "High Level
+ * Abstractions for Simplifying Extended String Constraints in SMT", CAV 2019.
+ */
+class ArithEntail
+{
+ public:
+  /** check arithmetic entailment equal
+   * Returns true if it is always the case that a = b.
+   */
+  static bool checkEq(Node a, Node b);
+  /** check arithmetic entailment
+   * Returns true if it is always the case that a >= b,
+   * and a>b if strict is true.
+   */
+  static bool check(Node a, Node b, bool strict = false);
+  /** check arithmetic entailment
+   * Returns true if it is always the case that a >= 0.
+   */
+  static bool check(Node a, bool strict = false);
+  /** check arithmetic entailment with approximations
+   *
+   * Returns true if it is always the case that a >= 0. We expect that a is in
+   * rewritten form.
+   *
+   * This function uses "approximation" techniques that under-approximate
+   * the value of a for the purposes of showing the entailment holds. For
+   * example, given:
+   *   len( x ) - len( substr( y, 0, len( x ) ) )
+   * Since we know that len( substr( y, 0, len( x ) ) ) <= len( x ), the above
+   * term can be under-approximated as len( x ) - len( x ) = 0, which is >= 0,
+   * and thus the entailment len( x ) - len( substr( y, 0, len( x ) ) ) >= 0
+   * holds.
+   */
+  static bool checkApprox(Node a);
+
+  /**
+   * Checks whether assumption |= a >= 0 (if strict is false) or
+   * assumption |= a > 0 (if strict is true), where assumption is an equality
+   * assumption. The assumption must be in rewritten form.
+   *
+   * Example:
+   *
+   * checkWithEqAssumption(x + (str.len y) = 0, -x, false) = true
+   *
+   * Because: x = -(str.len y), so -x >= 0 --> (str.len y) >= 0 --> true
+   */
+  static bool checkWithEqAssumption(Node assumption,
+                                    Node a,
+                                    bool strict = false);
+
+  /**
+   * Checks whether assumption |= a >= b (if strict is false) or
+   * assumption |= a > b (if strict is true). The function returns true if it
+   * can be shown that the entailment holds and false otherwise. Assumption
+   * must be in rewritten form. Assumption may be an equality or an inequality.
+   *
+   * Example:
+   *
+   * checkWithAssumption(x + (str.len y) = 0, 0, x, false) = true
+   *
+   * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true
+   */
+  static bool checkWithAssumption(Node assumption,
+                                  Node a,
+                                  Node b,
+                                  bool strict = false);
+
+  /**
+   * Checks whether assumptions |= a >= b (if strict is false) or
+   * assumptions |= a > b (if strict is true). The function returns true if it
+   * can be shown that the entailment holds and false otherwise. Assumptions
+   * must be in rewritten form. Assumptions may be an equalities or an
+   * inequalities.
+   *
+   * Example:
+   *
+   * checkWithAssumptions([x + (str.len y) = 0], 0, x, false) = true
+   *
+   * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true
+   */
+  static bool checkWithAssumptions(std::vector<Node> assumptions,
+                                   Node a,
+                                   Node b,
+                                   bool strict = false);
+
+  /** get arithmetic lower bound
+   * If this function returns a non-null Node ret,
+   * then ret is a rational constant and
+   * we know that n >= ret always if isLower is true,
+   * or n <= ret if isLower is false.
+   *
+   * Notice the following invariant.
+   * If getConstantArithBound(a, true) = ret where ret is non-null, then for
+   * strict = { true, false } :
+   *   ret >= strict ? 1 : 0
+   *     if and only if
+   *   check( a, strict ) = true.
+   */
+  static Node getConstantBound(Node a, bool isLower = true);
+
+  /**
+   * Given an inequality y1 + ... + yn >= x, removes operands yi s.t. the
+   * original inequality still holds. Returns true if the original inequality
+   * holds and false otherwise. The list of ys is modified to contain a subset
+   * of the original ys.
+   *
+   * Example:
+   *
+   * inferZerosInSumGeq( (str.len x), [ (str.len x), (str.len y), 1 ], [] )
+   * --> returns true with ys = [ (str.len x) ] and zeroYs = [ (str.len y), 1 ]
+   *     (can be used to rewrite the inequality to false)
+   *
+   * inferZerosInSumGeq( (str.len x), [ (str.len y) ], [] )
+   * --> returns false because it is not possible to show
+   *     str.len(y) >= str.len(x)
+   */
+  static bool inferZerosInSumGeq(Node x,
+                                 std::vector<Node>& ys,
+                                 std::vector<Node>& zeroYs);
+
+ private:
+  /** check entail arithmetic internal
+   * Returns true if we can show a >= 0 always.
+   * a is in rewritten form.
+   */
+  static bool checkInternal(Node a);
+  /** Get arithmetic approximations
+   *
+   * This gets the (set of) arithmetic approximations for term a and stores
+   * them in approx. If isOverApprox is true, these are over-approximations
+   * for the value of a, otherwise, they are underapproximations. For example,
+   * an over-approximation for len( substr( y, n, m ) ) is m; an
+   * under-approximation for indexof( x, y, n ) is -1.
+   *
+   * Notice that this function is not generally recursive (although it may make
+   * a small bounded of recursive calls). Instead, it returns the shape
+   * of the approximations for a. For example, an under-approximation
+   * for the term len( replace( substr( x, 0, n ), y, z ) ) returned by this
+   * function might be len( substr( x, 0, n ) ) - len( y ), where we don't
+   * consider (recursively) the approximations for len( substr( x, 0, n ) ).
+   */
+  static void getArithApproximations(Node a,
+                                     std::vector<Node>& approx,
+                                     bool isOverApprox = false);
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
+
+#endif
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp

index 3384499a225a627da1589e345e2f3d9f7f6843e7..dc076e73475a723cb93fda78a6493d5198444fbc 100644 (file)
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -18,6 +18,7 @@
  
  #include "options/strings_options.h"
  #include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/strings_entail.h"
  #include "theory/strings/theory_strings_utils.h"
  #include "theory/strings/word.h"
  
@@ -154,7 +155,7 @@ void CoreSolver::checkFlatForms()
          for (const Node& n : it->second)
          {
            int firstc, lastc;
-          if (!SequencesRewriter::canConstantContainList(
+          if (!StringsEntail::canConstantContainList(
                    c, d_flat_form[n], firstc, lastc))
            {
              Trace("strings-ff-debug") << "Flat form for " << n
@@ -348,9 +349,8 @@ void CoreSolver::checkFlatForm(std::vector<Node>& eqc,
              if (!curr_c.isNull() && !cc_c.isNull())
              {
                // check for constant conflict
-              int index;
-              Node s =
-                  SequencesRewriter::splitConstant(cc_c, curr_c, index, isRev);
+              size_t index;
+              Node s = Word::splitConstant(cc_c, curr_c, index, isRev);
                if (s.isNull())
                {
                  d_bsolver.explainConstantEqc(ac,curr,exp);
@@ -915,8 +915,7 @@ void CoreSolver::getNormalForms(Node eqc,
        {
          NormalForm& nf = normal_forms[i];
          int firstc, lastc;
-        if (!SequencesRewriter::canConstantContainList(
-                c, nf.d_nf, firstc, lastc))
+        if (!StringsEntail::canConstantContainList(c, nf.d_nf, firstc, lastc))
          {
            Node n = nf.d_base;
            //conflict
@@ -1918,7 +1917,7 @@ int CoreSolver::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >&
        if (!c.isNull())
        {
          int findex, lindex;
-        if (!SequencesRewriter::canConstantContainList(
+        if (!StringsEntail::canConstantContainList(
                  c, i == 0 ? nfj : nfi, findex, lindex))
          {
            Trace("strings-solve-debug")
diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp

index 259588789f69cc64c03031d664a1f7b033f0d0df..d4f301e23c52192115a8f00e478159bd076f1c72 100644 (file)
--- a/src/theory/strings/regexp_elim.cpp
+++ b/src/theory/strings/regexp_elim.cpp
@@ -17,7 +17,7 @@
  
  #include "options/strings_options.h"
  #include "theory/rewriter.h"
-#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/regexp_entail.h"
  #include "theory/strings/theory_strings_utils.h"
  
  using namespace CVC4;
@@ -71,7 +71,7 @@ Node RegExpElimination::eliminateConcat(Node atom)
    for (unsigned i = 0, size = children.size(); i < size; i++)
    {
      Node c = children[i];
-    Node fl = SequencesRewriter::getFixedLengthForRegexp(c);
+    Node fl = RegExpEntail::getFixedLengthForRegexp(c);
      if (fl.isNull())
      {
        if (!hasPivotIndex && c.getKind() == REGEXP_STAR
diff --git a/src/theory/strings/regexp_entail.cpp b/src/theory/strings/regexp_entail.cpp

new file mode 100644 (file)

index 0000000..d038934
--- /dev/null
+++ b/src/theory/strings/regexp_entail.cpp
@@ -0,0 +1,623 @@
+/*********************                                                        */
+/*! \file regexp_entail.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Andres Noetzli, Tianyi Liang
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of entailment tests involving regular expressions
+ **/
+
+#include "theory/strings/regexp_entail.h"
+
+#include "theory/rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+
+using namespace std;
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
+                                       std::vector<Node>& children,
+                                       int dir)
+{
+  Trace("regexp-ext-rewrite-debug")
+      << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
+  Trace("regexp-ext-rewrite-debug")
+      << "  mchildren : " << mchildren << std::endl;
+  Trace("regexp-ext-rewrite-debug") << "  children : " << children << std::endl;
+  NodeManager* nm = NodeManager::currentNM();
+  unsigned tmin = dir < 0 ? 0 : dir;
+  unsigned tmax = dir < 0 ? 1 : dir;
+  // try to remove off front and back
+  for (unsigned t = 0; t < 2; t++)
+  {
+    if (tmin <= t && t <= tmax)
+    {
+      bool do_next = true;
+      while (!children.empty() && !mchildren.empty() && do_next)
+      {
+        do_next = false;
+        Node xc = mchildren[mchildren.size() - 1];
+        Node rc = children[children.size() - 1];
+        Assert(rc.getKind() != REGEXP_CONCAT);
+        Assert(xc.getKind() != STRING_CONCAT);
+        if (rc.getKind() == STRING_TO_REGEXP)
+        {
+          if (xc == rc[0])
+          {
+            children.pop_back();
+            mchildren.pop_back();
+            do_next = true;
+            Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl;
+          }
+          else if (xc.isConst() && rc[0].isConst())
+          {
+            // split the constant
+            size_t index;
+            Node s = Word::splitConstant(xc, rc[0], index, t == 0);
+            Trace("regexp-ext-rewrite-debug")
+                << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> "
+                << s << " " << index << " " << t << std::endl;
+            if (s.isNull())
+            {
+              Trace("regexp-ext-rewrite-debug")
+                  << "...return false" << std::endl;
+              return nm->mkConst(false);
+            }
+            else
+            {
+              Trace("regexp-ext-rewrite-debug")
+                  << "...strip equal const" << std::endl;
+              children.pop_back();
+              mchildren.pop_back();
+              if (index == 0)
+              {
+                mchildren.push_back(s);
+              }
+              else
+              {
+                children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
+              }
+            }
+            do_next = true;
+          }
+        }
+        else if (xc.isConst())
+        {
+          // check for constants
+          CVC4::String s = xc.getConst<String>();
+          if (Word::isEmpty(xc))
+          {
+            Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl;
+            // ignore and continue
+            mchildren.pop_back();
+            do_next = true;
+          }
+          else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA)
+          {
+            std::vector<unsigned> ssVec;
+            ssVec.push_back(t == 0 ? s.back() : s.front());
+            CVC4::String ss(ssVec);
+            if (testConstStringInRegExp(ss, 0, rc))
+            {
+              // strip off one character
+              mchildren.pop_back();
+              if (s.size() > 1)
+              {
+                if (t == 0)
+                {
+                  mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1)));
+                }
+                else
+                {
+                  mchildren.push_back(nm->mkConst(s.substr(1)));
+                }
+              }
+              children.pop_back();
+              do_next = true;
+            }
+            else
+            {
+              return nm->mkConst(false);
+            }
+          }
+          else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION)
+          {
+            // see if any/each child does not work
+            bool result_valid = true;
+            Node result;
+            Node emp_s = nm->mkConst(::CVC4::String(""));
+            for (unsigned i = 0; i < rc.getNumChildren(); i++)
+            {
+              std::vector<Node> mchildren_s;
+              std::vector<Node> children_s;
+              mchildren_s.push_back(xc);
+              utils::getConcat(rc[i], children_s);
+              Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
+              if (!ret.isNull())
+              {
+                // one conjunct cannot be satisfied, return false
+                if (rc.getKind() == REGEXP_INTER)
+                {
+                  return ret;
+                }
+              }
+              else
+              {
+                if (children_s.empty())
+                {
+                  // if we were able to fully consume, store the result
+                  Assert(mchildren_s.size() <= 1);
+                  if (mchildren_s.empty())
+                  {
+                    mchildren_s.push_back(emp_s);
+                  }
+                  if (result.isNull())
+                  {
+                    result = mchildren_s[0];
+                  }
+                  else if (result != mchildren_s[0])
+                  {
+                    result_valid = false;
+                  }
+                }
+                else
+                {
+                  result_valid = false;
+                }
+              }
+            }
+            if (result_valid)
+            {
+              if (result.isNull())
+              {
+                // all disjuncts cannot be satisfied, return false
+                Assert(rc.getKind() == REGEXP_UNION);
+                return nm->mkConst(false);
+              }
+              else
+              {
+                // all branches led to the same result
+                children.pop_back();
+                mchildren.pop_back();
+                if (result != emp_s)
+                {
+                  mchildren.push_back(result);
+                }
+                do_next = true;
+              }
+            }
+          }
+          else if (rc.getKind() == REGEXP_STAR)
+          {
+            // check if there is no way that this star can be unrolled even once
+            std::vector<Node> mchildren_s;
+            mchildren_s.insert(
+                mchildren_s.end(), mchildren.begin(), mchildren.end());
+            if (t == 1)
+            {
+              std::reverse(mchildren_s.begin(), mchildren_s.end());
+            }
+            std::vector<Node> children_s;
+            utils::getConcat(rc[0], children_s);
+            Trace("regexp-ext-rewrite-debug")
+                << "...recursive call on body of star" << std::endl;
+            Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
+            if (!ret.isNull())
+            {
+              Trace("regexp-ext-rewrite-debug")
+                  << "CRE : regexp star infeasable " << xc << " " << rc
+                  << std::endl;
+              children.pop_back();
+              if (!children.empty())
+              {
+                Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl;
+                do_next = true;
+              }
+            }
+            else
+            {
+              if (children_s.empty())
+              {
+                // check if beyond this, we can't do it or there is nothing
+                // left, if so, repeat
+                bool can_skip = false;
+                if (children.size() > 1)
+                {
+                  std::vector<Node> mchildren_ss;
+                  mchildren_ss.insert(
+                      mchildren_ss.end(), mchildren.begin(), mchildren.end());
+                  std::vector<Node> children_ss;
+                  children_ss.insert(
+                      children_ss.end(), children.begin(), children.end() - 1);
+                  if (t == 1)
+                  {
+                    std::reverse(mchildren_ss.begin(), mchildren_ss.end());
+                    std::reverse(children_ss.begin(), children_ss.end());
+                  }
+                  if (simpleRegexpConsume(mchildren_ss, children_ss, t)
+                          .isNull())
+                  {
+                    can_skip = true;
+                  }
+                }
+                if (!can_skip)
+                {
+                  Trace("regexp-ext-rewrite-debug")
+                      << "...can't skip" << std::endl;
+                  // take the result of fully consuming once
+                  if (t == 1)
+                  {
+                    std::reverse(mchildren_s.begin(), mchildren_s.end());
+                  }
+                  mchildren.clear();
+                  mchildren.insert(
+                      mchildren.end(), mchildren_s.begin(), mchildren_s.end());
+                  do_next = true;
+                }
+                else
+                {
+                  Trace("regexp-ext-rewrite-debug")
+                      << "...can skip " << rc << " from " << xc << std::endl;
+                }
+              }
+            }
+          }
+        }
+        if (!do_next)
+        {
+          Trace("regexp-ext-rewrite")
+              << "Cannot consume : " << xc << " " << rc << std::endl;
+        }
+      }
+    }
+    if (dir != 0)
+    {
+      std::reverse(children.begin(), children.end());
+      std::reverse(mchildren.begin(), mchildren.end());
+    }
+  }
+  return Node::null();
+}
+
+bool RegExpEntail::isConstRegExp(TNode t)
+{
+  if (t.getKind() == STRING_TO_REGEXP)
+  {
+    return t[0].isConst();
+  }
+  else if (t.isVar())
+  {
+    return false;
+  }
+  for (unsigned i = 0; i < t.getNumChildren(); ++i)
+  {
+    if (!isConstRegExp(t[i]))
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool RegExpEntail::testConstStringInRegExp(CVC4::String& s,
+                                           unsigned index_start,
+                                           TNode r)
+{
+  Assert(index_start <= s.size());
+  Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
+                        << index_start << std::endl;
+  Assert(!r.isVar());
+  Kind k = r.getKind();
+  switch (k)
+  {
+    case STRING_TO_REGEXP:
+    {
+      CVC4::String s2 = s.substr(index_start, s.size() - index_start);
+      if (r[0].isConst())
+      {
+        return (s2 == r[0].getConst<String>());
+      }
+      Assert(false) << "RegExp contains variables";
+      return false;
+    }
+    case REGEXP_CONCAT:
+    {
+      if (s.size() != index_start)
+      {
+        std::vector<int> vec_k(r.getNumChildren(), -1);
+        int start = 0;
+        int left = (int)s.size() - index_start;
+        int i = 0;
+        while (i < (int)r.getNumChildren())
+        {
+          bool flag = true;
+          if (i == (int)r.getNumChildren() - 1)
+          {
+            if (testConstStringInRegExp(s, index_start + start, r[i]))
+            {
+              return true;
+            }
+          }
+          else if (i == -1)
+          {
+            return false;
+          }
+          else
+          {
+            for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
+            {
+              CVC4::String t = s.substr(index_start + start, vec_k[i]);
+              if (testConstStringInRegExp(t, 0, r[i]))
+              {
+                start += vec_k[i];
+                left -= vec_k[i];
+                flag = false;
+                ++i;
+                vec_k[i] = -1;
+                break;
+              }
+            }
+          }
+
+          if (flag)
+          {
+            --i;
+            if (i >= 0)
+            {
+              start -= vec_k[i];
+              left += vec_k[i];
+            }
+          }
+        }
+        return false;
+      }
+      else
+      {
+        for (unsigned i = 0; i < r.getNumChildren(); ++i)
+        {
+          if (!testConstStringInRegExp(s, index_start, r[i]))
+          {
+            return false;
+          }
+        }
+        return true;
+      }
+    }
+    case REGEXP_UNION:
+    {
+      for (unsigned i = 0; i < r.getNumChildren(); ++i)
+      {
+        if (testConstStringInRegExp(s, index_start, r[i]))
+        {
+          return true;
+        }
+      }
+      return false;
+    }
+    case REGEXP_INTER:
+    {
+      for (unsigned i = 0; i < r.getNumChildren(); ++i)
+      {
+        if (!testConstStringInRegExp(s, index_start, r[i]))
+        {
+          return false;
+        }
+      }
+      return true;
+    }
+    case REGEXP_STAR:
+    {
+      if (s.size() != index_start)
+      {
+        for (unsigned i = s.size() - index_start; i > 0; --i)
+        {
+          CVC4::String t = s.substr(index_start, i);
+          if (testConstStringInRegExp(t, 0, r[0]))
+          {
+            if (index_start + i == s.size()
+                || testConstStringInRegExp(s, index_start + i, r))
+            {
+              return true;
+            }
+          }
+        }
+        return false;
+      }
+      else
+      {
+        return true;
+      }
+    }
+    case REGEXP_EMPTY: { return false;
+    }
+    case REGEXP_SIGMA:
+    {
+      if (s.size() == index_start + 1)
+      {
+        return true;
+      }
+      else
+      {
+        return false;
+      }
+    }
+    case REGEXP_RANGE:
+    {
+      if (s.size() == index_start + 1)
+      {
+        unsigned a = r[0].getConst<String>().front();
+        unsigned b = r[1].getConst<String>().front();
+        unsigned c = s.back();
+        return (a <= c && c <= b);
+      }
+      else
+      {
+        return false;
+      }
+    }
+    case REGEXP_LOOP:
+    {
+      NodeManager* nm = NodeManager::currentNM();
+      uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
+      if (s.size() == index_start)
+      {
+        return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
+      }
+      else if (l == 0 && r[1] == r[2])
+      {
+        return false;
+      }
+      else
+      {
+        Assert(r.getNumChildren() == 3)
+            << "String rewriter error: LOOP has 2 children";
+        if (l == 0)
+        {
+          // R{0,u}
+          uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
+          for (unsigned len = s.size() - index_start; len >= 1; len--)
+          {
+            CVC4::String t = s.substr(index_start, len);
+            if (testConstStringInRegExp(t, 0, r[0]))
+            {
+              if (len + index_start == s.size())
+              {
+                return true;
+              }
+              else
+              {
+                Node num2 = nm->mkConst(CVC4::Rational(u - 1));
+                Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2);
+                if (testConstStringInRegExp(s, index_start + len, r2))
+                {
+                  return true;
+                }
+              }
+            }
+          }
+          return false;
+        }
+        else
+        {
+          // R{l,l}
+          Assert(r[1] == r[2])
+              << "String rewriter error: LOOP nums are not equal";
+          if (l > s.size() - index_start)
+          {
+            if (testConstStringInRegExp(s, s.size(), r[0]))
+            {
+              l = s.size() - index_start;
+            }
+            else
+            {
+              return false;
+            }
+          }
+          for (unsigned len = 1; len <= s.size() - index_start; len++)
+          {
+            CVC4::String t = s.substr(index_start, len);
+            if (testConstStringInRegExp(t, 0, r[0]))
+            {
+              Node num2 = nm->mkConst(CVC4::Rational(l - 1));
+              Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2);
+              if (testConstStringInRegExp(s, index_start + len, r2))
+              {
+                return true;
+              }
+            }
+          }
+          return false;
+        }
+      }
+    }
+    case REGEXP_COMPLEMENT:
+    {
+      return !testConstStringInRegExp(s, index_start, r[0]);
+      break;
+    }
+    default:
+    {
+      Assert(!utils::isRegExpKind(k));
+      return false;
+    }
+  }
+}
+
+bool RegExpEntail::hasEpsilonNode(TNode node)
+{
+  for (const Node& nc : node)
+  {
+    if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0]))
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+Node RegExpEntail::getFixedLengthForRegexp(Node n)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  if (n.getKind() == STRING_TO_REGEXP)
+  {
+    Node ret = nm->mkNode(STRING_LENGTH, n[0]);
+    ret = Rewriter::rewrite(ret);
+    if (ret.isConst())
+    {
+      return ret;
+    }
+  }
+  else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
+  {
+    return nm->mkConst(Rational(1));
+  }
+  else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
+  {
+    Node ret;
+    for (const Node& nc : n)
+    {
+      Node flc = getFixedLengthForRegexp(nc);
+      if (flc.isNull() || (!ret.isNull() && ret != flc))
+      {
+        return Node::null();
+      }
+      else if (ret.isNull())
+      {
+        // first time
+        ret = flc;
+      }
+    }
+    return ret;
+  }
+  else if (n.getKind() == REGEXP_CONCAT)
+  {
+    NodeBuilder<> nb(PLUS);
+    for (const Node& nc : n)
+    {
+      Node flc = getFixedLengthForRegexp(nc);
+      if (flc.isNull())
+      {
+        return flc;
+      }
+      nb << flc;
+    }
+    Node ret = nb.constructNode();
+    ret = Rewriter::rewrite(ret);
+    return ret;
+  }
+  return Node::null();
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/strings/regexp_entail.h b/src/theory/strings/regexp_entail.h

new file mode 100644 (file)

index 0000000..0732a67
--- /dev/null
+++ b/src/theory/strings/regexp_entail.h
@@ -0,0 +1,117 @@
+/*********************                                                        */
+/*! \file regexp_entail.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Andres Noetzli, Tianyi Liang
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Entailment tests involving regular expressions
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__REGEXP_ENTAIL_H
+#define CVC4__THEORY__STRINGS__REGEXP_ENTAIL_H
+
+#include <climits>
+#include <utility>
+#include <vector>
+
+#include "expr/attribute.h"
+#include "theory/strings/rewrites.h"
+#include "theory/theory_rewriter.h"
+#include "theory/type_enumerator.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+class RegExpEntail
+{
+ public:
+  /** simple regular expression consume
+   *
+   * This method is called when we are rewriting a membership of the form
+   *   s1 ++ ... ++ sn in r1 ++ ... ++ rm
+   * We have that mchildren consists of the strings s1...sn, and children
+   * consists of the regular expressions r1...rm.
+   *
+   * This method tries to strip off parts of the concatenation terms. It updates
+   * the vectors such that the resulting vectors are such that the membership
+   * mchildren[n'...n''] in children[m'...m''] is equivalent to the input
+   * membership. The argument dir indicates the direction to consider, where
+   * 0 means strip off the front, 1 off the back, and < 0 off of both.
+   *
+   * If this method returns the false node, then we have inferred that no
+   * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or
+   * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null
+   * node.
+   *
+   * For example, given input
+   *   mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0,
+   * this method updates:
+   *   mchildren = { "b", x }, children = { ("cd")* }
+   * and returns null.
+   *
+   * For example, given input
+   *   { x, "abb", x }, { [[x]], ["a"..."b"], allchar, [[y]], [[x]]} and dir=-1,
+   * this method updates:
+   *   { "b" }, { [[y]] }
+   * where [[.]] denotes str.to.re, and returns null.
+   *
+   * Notice that the above requirement for returning false is stronger than
+   * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false.
+   * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false
+   * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb".
+   * We do not return false even though the above membership is equivalent
+   * to false. We do this because the function is used e.g. to test whether a
+   * possible unrolling leads to a conflict. This is demonstrated by the
+   * following examples:
+   *
+   * For example, given input
+   *   { "bb", x }, { "b", ("a")* } and dir=-1,
+   * this method updates:
+   *   { "b" }, { ("a")* }
+   * and returns null.
+   *
+   * For example, given input
+   *   { "cb", x }, { "b", ("a")* } and dir=-1,
+   * this method leaves children and mchildren unchanged and returns false.
+   *
+   * Notice that based on this, we can determine that:
+   *   "cb" ++ x  in ( "b" ++ ("a")* )*
+   * is equivalent to false, whereas we cannot determine that:
+   *   "bb" ++ x  in ( "b" ++ ("a")* )*
+   * is equivalent to false.
+   */
+  static Node simpleRegexpConsume(std::vector<Node>& mchildren,
+                                  std::vector<Node>& children,
+                                  int dir = -1);
+  /** Is t a constant regular expression? */
+  static bool isConstRegExp(TNode t);
+  /**
+   * Does the substring of s starting at index_start occur in constant regular
+   * expression r?
+   */
+  static bool testConstStringInRegExp(CVC4::String& s,
+                                      unsigned index_start,
+                                      TNode r);
+  /** Does regular expression node have (str.to.re "") as a child? */
+  static bool hasEpsilonNode(TNode node);
+  /** get length for regular expression
+   *
+   * Given regular expression n, if this method returns a non-null value c, then
+   * x in n entails len( x ) = c.
+   */
+  static Node getFixedLengthForRegexp(Node n);
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
+
+#endif /* CVC4__THEORY__STRINGS__REGEXP_ENTAIL_H */
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp

index d104f3ade94312205a5da9c2d2b9b10b1b0c0475..0520ec88ac858eb048aef5c911ef902cb5961c5a 100644 (file)
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -18,7 +18,7 @@
  
  #include "expr/kind.h"
  #include "options/strings_options.h"
-#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/regexp_entail.h"
  #include "theory/strings/theory_strings_utils.h"
  #include "theory/strings/word.h"
  
@@ -83,7 +83,7 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
        {
          d_constCache[cur] = RE_C_CONSTANT;
        }
-      else if (!isRegExpKind(ck))
+      else if (!utils::isRegExpKind(ck))
        {
          // non-regular expression applications, e.g. function applications
          // with regular expression return type are treated as variables.
@@ -115,15 +115,6 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
    return d_constCache[r];
  }
  
-bool RegExpOpr::isRegExpKind(Kind k)
-{
-  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
-         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
-         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
-         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
-         || k == REGEXP_COMPLEMENT;
-}
-
  // 0-unknown, 1-yes, 2-no
  int RegExpOpr::delta( Node r, Node &exp ) {
    Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl;
@@ -270,7 +261,7 @@ int RegExpOpr::delta( Node r, Node &exp ) {
          break;
        }
        default: {
-        Assert(!isRegExpKind(k));
+        Assert(!utils::isRegExpKind(k));
          break;
        }
      }
@@ -521,7 +512,7 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
          break;
        }
        default: {
-        Assert(!isRegExpKind(r.getKind()));
+        Assert(!utils::isRegExpKind(r.getKind()));
          return 0;
          break;
        }
@@ -808,7 +799,7 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
          // aren't a standard regular expression kind. However, if we do, then
          // the following code is conservative and says that the current
          // regular expression can begin with any character.
-        Assert(isRegExpKind(k));
+        Assert(utils::isRegExpKind(k));
          // can start with any character
          Assert(d_lastchar < std::numeric_limits<unsigned>::max());
          for (unsigned i = 0; i <= d_lastchar; i++)
@@ -908,12 +899,12 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
          // all strings in the language of R1 have the same length, say n,
          // then the conclusion of the reduction is quantifier-free:
          //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
-        Node reLength = SequencesRewriter::getFixedLengthForRegexp(r[0]);
+        Node reLength = RegExpEntail::getFixedLengthForRegexp(r[0]);
          if (reLength.isNull())
          {
            // try from the opposite end
            unsigned indexE = r.getNumChildren() - 1;
-          reLength = SequencesRewriter::getFixedLengthForRegexp(r[indexE]);
+          reLength = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
            if (!reLength.isNull())
            {
              indexRm = indexE;
@@ -1068,7 +1059,7 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
          break;
        }
        default: {
-        Assert(!isRegExpKind(k));
+        Assert(!utils::isRegExpKind(k));
          break;
        }
      }
@@ -1267,7 +1258,7 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
          break;
        }
        default: {
-        Assert(!isRegExpKind(k));
+        Assert(!utils::isRegExpKind(k));
          break;
        }
      }
@@ -1744,7 +1735,7 @@ std::string RegExpOpr::mkString( Node r ) {
          std::stringstream ss;
          ss << r;
          retStr = ss.str();
-        Assert(!isRegExpKind(r.getKind()));
+        Assert(!utils::isRegExpKind(r.getKind()));
          break;
        }
      }
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h

index 7845b2e00d63b892f64eb01e70534afeb3f2109b..53d60cd40f70f28fbe4f790fa631ab0e2612d4c5 100644 (file)
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -121,8 +121,6 @@ class RegExpOpr {
    bool checkConstRegExp( Node r );
    /** get the constant type for regular expression r */
    RegExpConstType getRegExpConstType(Node r);
-  /** is k a native operator whose return type is a regular expression? */
-  static bool isRegExpKind(Kind k);
    void simplify(Node t, std::vector< Node > &new_nodes, bool polarity);
    /**
     * This method returns 1 if the empty string is in r, 2 if the empty string
diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp

index f6ef92b4ded9cc3929820f092b20a7168e078279..35c52646d4ec66f4c362c5534e563ee9b2ae7038 100644 (file)
--- a/src/theory/strings/regexp_solver.cpp
+++ b/src/theory/strings/regexp_solver.cpp
@@ -686,7 +686,7 @@ Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
      {
        Trace("strings-error") << "Unsupported term: " << r
                               << " in normalization SymRegExp." << std::endl;
-      Assert(!RegExpOpr::isRegExpKind(r.getKind()));
+      Assert(!utils::isRegExpKind(r.getKind()));
      }
    }
    return ret;
diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp

index be1e1345920a3a659c743ca83e551f97cc2c3a28..9ccda55c2a62c75ffeef703cd01df30bed603149 100644 (file)
--- a/src/theory/strings/sequences_rewriter.cpp
+++ b/src/theory/strings/sequences_rewriter.cpp
@@ -16,294 +16,22 @@
  
  #include "theory/strings/sequences_rewriter.h"
  
-#include <stdint.h>
-#include <algorithm>
-
+#include "expr/attribute.h"
  #include "expr/node_builder.h"
-#include "options/strings_options.h"
-#include "smt/logic_exception.h"
-#include "theory/arith/arith_msum.h"
-#include "theory/strings/regexp_operation.h"
+#include "theory/rewriter.h"
+#include "theory/strings/arith_entail.h"
+#include "theory/strings/regexp_entail.h"
+#include "theory/strings/strings_entail.h"
  #include "theory/strings/strings_rewriter.h"
  #include "theory/strings/theory_strings_utils.h"
  #include "theory/strings/word.h"
-#include "theory/theory.h"
-#include "util/integer.h"
-#include "util/rational.h"
  
  using namespace std;
-using namespace CVC4;
  using namespace CVC4::kind;
-using namespace CVC4::theory;
-using namespace CVC4::theory::strings;
  
-Node SequencesRewriter::simpleRegexpConsume(std::vector<Node>& mchildren,
-                                            std::vector<Node>& children,
-                                            int dir)
-{
-  Trace("regexp-ext-rewrite-debug")
-      << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
-  Trace("regexp-ext-rewrite-debug")
-      << "  mchildren : " << mchildren << std::endl;
-  Trace("regexp-ext-rewrite-debug") << "  children : " << children << std::endl;
-  NodeManager* nm = NodeManager::currentNM();
-  unsigned tmin = dir < 0 ? 0 : dir;
-  unsigned tmax = dir < 0 ? 1 : dir;
-  // try to remove off front and back
-  for (unsigned t = 0; t < 2; t++)
-  {
-    if (tmin <= t && t <= tmax)
-    {
-      bool do_next = true;
-      while (!children.empty() && !mchildren.empty() && do_next)
-      {
-        do_next = false;
-        Node xc = mchildren[mchildren.size() - 1];
-        Node rc = children[children.size() - 1];
-        Assert(rc.getKind() != kind::REGEXP_CONCAT);
-        Assert(xc.getKind() != kind::STRING_CONCAT);
-        if (rc.getKind() == kind::STRING_TO_REGEXP)
-        {
-          if (xc == rc[0])
-          {
-            children.pop_back();
-            mchildren.pop_back();
-            do_next = true;
-            Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl;
-          }
-          else if (xc.isConst() && rc[0].isConst())
-          {
-            // split the constant
-            int index;
-            Node s = splitConstant(xc, rc[0], index, t == 0);
-            Trace("regexp-ext-rewrite-debug")
-                << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> "
-                << s << " " << index << " " << t << std::endl;
-            if (s.isNull())
-            {
-              Trace("regexp-ext-rewrite-debug")
-                  << "...return false" << std::endl;
-              return NodeManager::currentNM()->mkConst(false);
-            }
-            else
-            {
-              Trace("regexp-ext-rewrite-debug")
-                  << "...strip equal const" << std::endl;
-              children.pop_back();
-              mchildren.pop_back();
-              if (index == 0)
-              {
-                mchildren.push_back(s);
-              }
-              else
-              {
-                children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
-              }
-            }
-            do_next = true;
-          }
-        }
-        else if (xc.isConst())
-        {
-          // check for constants
-          CVC4::String s = xc.getConst<String>();
-          if (Word::isEmpty(xc))
-          {
-            Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl;
-            // ignore and continue
-            mchildren.pop_back();
-            do_next = true;
-          }
-          else if (rc.getKind() == kind::REGEXP_RANGE
-                   || rc.getKind() == kind::REGEXP_SIGMA)
-          {
-            std::vector<unsigned> ssVec;
-            ssVec.push_back(t == 0 ? s.back() : s.front());
-            CVC4::String ss(ssVec);
-            if (testConstStringInRegExp(ss, 0, rc))
-            {
-              // strip off one character
-              mchildren.pop_back();
-              if (s.size() > 1)
-              {
-                if (t == 0)
-                {
-                  mchildren.push_back(NodeManager::currentNM()->mkConst(
-                      s.substr(0, s.size() - 1)));
-                }
-                else
-                {
-                  mchildren.push_back(
-                      NodeManager::currentNM()->mkConst(s.substr(1)));
-                }
-              }
-              children.pop_back();
-              do_next = true;
-            }
-            else
-            {
-              return NodeManager::currentNM()->mkConst(false);
-            }
-          }
-          else if (rc.getKind() == kind::REGEXP_INTER
-                   || rc.getKind() == kind::REGEXP_UNION)
-          {
-            // see if any/each child does not work
-            bool result_valid = true;
-            Node result;
-            Node emp_s = NodeManager::currentNM()->mkConst(::CVC4::String(""));
-            for (unsigned i = 0; i < rc.getNumChildren(); i++)
-            {
-              std::vector<Node> mchildren_s;
-              std::vector<Node> children_s;
-              mchildren_s.push_back(xc);
-              utils::getConcat(rc[i], children_s);
-              Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
-              if (!ret.isNull())
-              {
-                // one conjunct cannot be satisfied, return false
-                if (rc.getKind() == kind::REGEXP_INTER)
-                {
-                  return ret;
-                }
-              }
-              else
-              {
-                if (children_s.empty())
-                {
-                  // if we were able to fully consume, store the result
-                  Assert(mchildren_s.size() <= 1);
-                  if (mchildren_s.empty())
-                  {
-                    mchildren_s.push_back(emp_s);
-                  }
-                  if (result.isNull())
-                  {
-                    result = mchildren_s[0];
-                  }
-                  else if (result != mchildren_s[0])
-                  {
-                    result_valid = false;
-                  }
-                }
-                else
-                {
-                  result_valid = false;
-                }
-              }
-            }
-            if (result_valid)
-            {
-              if (result.isNull())
-              {
-                // all disjuncts cannot be satisfied, return false
-                Assert(rc.getKind() == kind::REGEXP_UNION);
-                return NodeManager::currentNM()->mkConst(false);
-              }
-              else
-              {
-                // all branches led to the same result
-                children.pop_back();
-                mchildren.pop_back();
-                if (result != emp_s)
-                {
-                  mchildren.push_back(result);
-                }
-                do_next = true;
-              }
-            }
-          }
-          else if (rc.getKind() == kind::REGEXP_STAR)
-          {
-            // check if there is no way that this star can be unrolled even once
-            std::vector<Node> mchildren_s;
-            mchildren_s.insert(
-                mchildren_s.end(), mchildren.begin(), mchildren.end());
-            if (t == 1)
-            {
-              std::reverse(mchildren_s.begin(), mchildren_s.end());
-            }
-            std::vector<Node> children_s;
-            utils::getConcat(rc[0], children_s);
-            Trace("regexp-ext-rewrite-debug")
-                << "...recursive call on body of star" << std::endl;
-            Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
-            if (!ret.isNull())
-            {
-              Trace("regexp-ext-rewrite-debug")
-                  << "CRE : regexp star infeasable " << xc << " " << rc
-                  << std::endl;
-              children.pop_back();
-              if (!children.empty())
-              {
-                Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl;
-                do_next = true;
-              }
-            }
-            else
-            {
-              if (children_s.empty())
-              {
-                // check if beyond this, we can't do it or there is nothing
-                // left, if so, repeat
-                bool can_skip = false;
-                if (children.size() > 1)
-                {
-                  std::vector<Node> mchildren_ss;
-                  mchildren_ss.insert(
-                      mchildren_ss.end(), mchildren.begin(), mchildren.end());
-                  std::vector<Node> children_ss;
-                  children_ss.insert(
-                      children_ss.end(), children.begin(), children.end() - 1);
-                  if (t == 1)
-                  {
-                    std::reverse(mchildren_ss.begin(), mchildren_ss.end());
-                    std::reverse(children_ss.begin(), children_ss.end());
-                  }
-                  if (simpleRegexpConsume(mchildren_ss, children_ss, t)
-                          .isNull())
-                  {
-                    can_skip = true;
-                  }
-                }
-                if (!can_skip)
-                {
-                  Trace("regexp-ext-rewrite-debug")
-                      << "...can't skip" << std::endl;
-                  // take the result of fully consuming once
-                  if (t == 1)
-                  {
-                    std::reverse(mchildren_s.begin(), mchildren_s.end());
-                  }
-                  mchildren.clear();
-                  mchildren.insert(
-                      mchildren.end(), mchildren_s.begin(), mchildren_s.end());
-                  do_next = true;
-                }
-                else
-                {
-                  Trace("regexp-ext-rewrite-debug")
-                      << "...can skip " << rc << " from " << xc << std::endl;
-                }
-              }
-            }
-          }
-        }
-        if (!do_next)
-        {
-          Trace("regexp-ext-rewrite")
-              << "Cannot consume : " << xc << " " << rc << std::endl;
-        }
-      }
-    }
-    if (dir != 0)
-    {
-      std::reverse(children.begin(), children.end());
-      std::reverse(mchildren.begin(), mchildren.end());
-    }
-  }
-  return Node::null();
-}
+namespace CVC4 {
+namespace theory {
+namespace strings {
  
  Node SequencesRewriter::rewriteEquality(Node node)
  {
@@ -325,7 +53,7 @@ Node SequencesRewriter::rewriteEquality(Node node)
      // must call rewrite contains directly to avoid infinite loop
      // we do a fix point since we may rewrite contains terms to simpler
      // contains terms.
-    Node ctn = checkEntailContains(node[r], node[1 - r], false);
+    Node ctn = StringsEntail::checkContains(node[r], node[1 - r], false);
      if (!ctn.isNull())
      {
        if (!ctn.getConst<bool>())
@@ -474,7 +202,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
    // ------- homogeneous constants
    for (unsigned i = 0; i < 2; i++)
    {
-    Node cn = checkEntailHomogeneousString(node[i]);
+    Node cn = StringsEntail::checkHomogeneousString(node[i]);
      if (!cn.isNull() && !Word::isEmpty(cn))
      {
        Assert(cn.isConst());
@@ -564,7 +292,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
          }
  
          // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
-        if (checkEntailNonEmpty(ne[2]))
+        if (StringsEntail::checkNonEmpty(ne[2]))
          {
            Node ret =
                nm->mkNode(AND,
@@ -574,7 +302,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
          }
  
          // (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
-        if (checkEntailLengthOne(ne[1]) && ne[2] == empty)
+        if (StringsEntail::checkLengthOne(ne[1]) && ne[2] == empty)
          {
            Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
            return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
@@ -584,7 +312,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
        {
          Node zero = nm->mkConst(Rational(0));
  
-        if (checkEntailArith(ne[1], false) && checkEntailArith(ne[2], true))
+        if (ArithEntail::check(ne[1], false) && ArithEntail::check(ne[2], true))
          {
            // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
            if (ne[1] == zero)
@@ -600,7 +328,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
          }
  
          // (= "" (str.substr "A" 0 z)) ---> (<= z 0)
-        if (checkEntailNonEmpty(ne[0]) && ne[1] == zero)
+        if (StringsEntail::checkNonEmpty(ne[0]) && ne[1] == zero)
          {
            Node ret = nm->mkNode(LEQ, ne[2], zero);
            return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z);
@@ -618,7 +346,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
        Node x = node[1 - i];
  
        // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
-      if (checkEntailNonEmpty(x) && repl[0] == empty)
+      if (StringsEntail::checkNonEmpty(x) && repl[0] == empty)
        {
          Node ret = nm->mkNode(
              EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1]));
@@ -649,7 +377,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
        {
          Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
          Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
-        if (checkEntailArithEq(lenY, lenZ))
+        if (ArithEntail::checkEq(lenY, lenZ))
          {
            Node ret = nm->mkNode(OR,
                                  nm->mkNode(EQUAL, repl[0], repl[1]),
@@ -672,7 +400,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
    {
      if (node[1 - i].getKind() == STRING_CONCAT)
      {
-      new_ret = inferEqsFromContains(node[i], node[1 - i]);
+      new_ret = StringsEntail::inferEqsFromContains(node[i], node[1 - i]);
        if (!new_ret.isNull())
        {
          return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL);
@@ -709,7 +437,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
            Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
            Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
  
-          if (checkEntailArithEq(lenPfx0, lenPfx1))
+          if (ArithEntail::checkEq(lenPfx0, lenPfx1))
            {
              std::vector<Node> sfxv0(v0.begin() + i, v0.end());
              std::vector<Node> sfxv1(v1.begin() + j, v1.end());
@@ -719,7 +447,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
                                        .eqNode(utils::mkConcat(sfxv1, stype)));
              return returnRewrite(node, ret, Rewrite::SPLIT_EQ);
            }
-          else if (checkEntailArith(lenPfx1, lenPfx0, true))
+          else if (ArithEntail::check(lenPfx1, lenPfx0, true))
            {
              // The prefix on the right-hand side is strictly longer than the
              // prefix on the left-hand side, so we try to strip the right-hand
@@ -729,7 +457,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
              // (= (str.++ "A" x y) (str.++ x "AB" z)) --->
              //   (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
              std::vector<Node> rpfxv1;
-            if (stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0))
+            if (StringsEntail::stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0))
              {
                std::vector<Node> sfxv0(v0.begin() + i, v0.end());
                pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
@@ -746,7 +474,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
              // in the inner loop)
              break;
            }
-          else if (checkEntailArith(lenPfx0, lenPfx1, true))
+          else if (ArithEntail::check(lenPfx0, lenPfx1, true))
            {
              // The prefix on the left-hand side is strictly longer than the
              // prefix on the right-hand side, so we try to strip the left-hand
@@ -756,7 +484,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
              // (= (str.++ x "AB" z) (str.++ "A" x y)) --->
              //   (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
              std::vector<Node> rpfxv0;
-            if (stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1))
+            if (StringsEntail::stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1))
              {
                pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end());
                std::vector<Node> sfxv1(v1.begin() + j, v1.end());
@@ -927,7 +655,7 @@ Node SequencesRewriter::rewriteConcat(Node node)
    Node lastX;
    for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
    {
-    Node s = getStringOrEmpty(node_vec[i]);
+    Node s = StringsEntail::getStringOrEmpty(node_vec[i]);
      bool nextX = false;
      if (s != lastX)
      {
@@ -1058,7 +786,8 @@ Node SequencesRewriter::rewriteConcatRegExp(TNode node)
      {
        // if empty, drop it
        // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
-      if (isConstRegExp(curr) && testConstStringInRegExp(emptyStr, 0, curr))
+      if (RegExpEntail::isConstRegExp(curr)
+          && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr))
        {
          curr = Node::null();
        }
@@ -1079,8 +808,9 @@ Node SequencesRewriter::rewriteConcatRegExp(TNode node)
            lastAllStar = true;
            // go back and remove empty ones from back of cvec
            // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
-          while (!cvec.empty() && isConstRegExp(cvec.back())
-                 && testConstStringInRegExp(emptyStr, 0, cvec.back()))
+          while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back())
+                 && RegExpEntail::testConstStringInRegExp(
+                        emptyStr, 0, cvec.back()))
            {
              cvec.pop_back();
            }
@@ -1145,7 +875,7 @@ Node SequencesRewriter::rewriteStarRegExp(TNode node)
    else if (node[0].getKind() == REGEXP_UNION)
    {
      // simplification of unions under star
-    if (hasEpsilonNode(node[0]))
+    if (RegExpEntail::hasEpsilonNode(node[0]))
      {
        bool changed = false;
        std::vector<Node> node_vec;
@@ -1348,270 +1078,6 @@ Node SequencesRewriter::rewriteRangeRegExp(TNode node)
    return node;
  }
  
-bool SequencesRewriter::isConstRegExp(TNode t)
-{
-  if (t.getKind() == kind::STRING_TO_REGEXP)
-  {
-    return t[0].isConst();
-  }
-  else if (t.isVar())
-  {
-    return false;
-  }
-  else
-  {
-    for (unsigned i = 0; i < t.getNumChildren(); ++i)
-    {
-      if (!isConstRegExp(t[i]))
-      {
-        return false;
-      }
-    }
-    return true;
-  }
-}
-
-bool SequencesRewriter::testConstStringInRegExp(CVC4::String& s,
-                                                unsigned int index_start,
-                                                TNode r)
-{
-  Assert(index_start <= s.size());
-  Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
-                        << index_start << std::endl;
-  Assert(!r.isVar());
-  Kind k = r.getKind();
-  switch (k)
-  {
-    case kind::STRING_TO_REGEXP:
-    {
-      CVC4::String s2 = s.substr(index_start, s.size() - index_start);
-      if (r[0].isConst())
-      {
-        return (s2 == r[0].getConst<String>());
-      }
-      else
-      {
-        Assert(false) << "RegExp contains variables";
-        return false;
-      }
-    }
-    case kind::REGEXP_CONCAT:
-    {
-      if (s.size() != index_start)
-      {
-        std::vector<int> vec_k(r.getNumChildren(), -1);
-        int start = 0;
-        int left = (int)s.size() - index_start;
-        int i = 0;
-        while (i < (int)r.getNumChildren())
-        {
-          bool flag = true;
-          if (i == (int)r.getNumChildren() - 1)
-          {
-            if (testConstStringInRegExp(s, index_start + start, r[i]))
-            {
-              return true;
-            }
-          }
-          else if (i == -1)
-          {
-            return false;
-          }
-          else
-          {
-            for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
-            {
-              CVC4::String t = s.substr(index_start + start, vec_k[i]);
-              if (testConstStringInRegExp(t, 0, r[i]))
-              {
-                start += vec_k[i];
-                left -= vec_k[i];
-                flag = false;
-                ++i;
-                vec_k[i] = -1;
-                break;
-              }
-            }
-          }
-
-          if (flag)
-          {
-            --i;
-            if (i >= 0)
-            {
-              start -= vec_k[i];
-              left += vec_k[i];
-            }
-          }
-        }
-        return false;
-      }
-      else
-      {
-        for (unsigned i = 0; i < r.getNumChildren(); ++i)
-        {
-          if (!testConstStringInRegExp(s, index_start, r[i])) return false;
-        }
-        return true;
-      }
-    }
-    case kind::REGEXP_UNION:
-    {
-      for (unsigned i = 0; i < r.getNumChildren(); ++i)
-      {
-        if (testConstStringInRegExp(s, index_start, r[i])) return true;
-      }
-      return false;
-    }
-    case kind::REGEXP_INTER:
-    {
-      for (unsigned i = 0; i < r.getNumChildren(); ++i)
-      {
-        if (!testConstStringInRegExp(s, index_start, r[i])) return false;
-      }
-      return true;
-    }
-    case kind::REGEXP_STAR:
-    {
-      if (s.size() != index_start)
-      {
-        for (unsigned i = s.size() - index_start; i > 0; --i)
-        {
-          CVC4::String t = s.substr(index_start, i);
-          if (testConstStringInRegExp(t, 0, r[0]))
-          {
-            if (index_start + i == s.size()
-                || testConstStringInRegExp(s, index_start + i, r))
-            {
-              return true;
-            }
-          }
-        }
-        return false;
-      }
-      else
-      {
-        return true;
-      }
-    }
-    case kind::REGEXP_EMPTY: { return false;
-    }
-    case kind::REGEXP_SIGMA:
-    {
-      if (s.size() == index_start + 1)
-      {
-        return true;
-      }
-      else
-      {
-        return false;
-      }
-    }
-    case kind::REGEXP_RANGE:
-    {
-      if (s.size() == index_start + 1)
-      {
-        unsigned a = r[0].getConst<String>().front();
-        unsigned b = r[1].getConst<String>().front();
-        unsigned c = s.back();
-        return (a <= c && c <= b);
-      }
-      else
-      {
-        return false;
-      }
-    }
-    case kind::REGEXP_LOOP:
-    {
-      uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
-      if (s.size() == index_start)
-      {
-        return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
-      }
-      else if (l == 0 && r[1] == r[2])
-      {
-        return false;
-      }
-      else
-      {
-        Assert(r.getNumChildren() == 3)
-            << "String rewriter error: LOOP has 2 children";
-        if (l == 0)
-        {
-          // R{0,u}
-          uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
-          for (unsigned len = s.size() - index_start; len >= 1; len--)
-          {
-            CVC4::String t = s.substr(index_start, len);
-            if (testConstStringInRegExp(t, 0, r[0]))
-            {
-              if (len + index_start == s.size())
-              {
-                return true;
-              }
-              else
-              {
-                Node num2 =
-                    NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1));
-                Node r2 = NodeManager::currentNM()->mkNode(
-                    kind::REGEXP_LOOP, r[0], r[1], num2);
-                if (testConstStringInRegExp(s, index_start + len, r2))
-                {
-                  return true;
-                }
-              }
-            }
-          }
-          return false;
-        }
-        else
-        {
-          // R{l,l}
-          Assert(r[1] == r[2])
-              << "String rewriter error: LOOP nums are not equal";
-          if (l > s.size() - index_start)
-          {
-            if (testConstStringInRegExp(s, s.size(), r[0]))
-            {
-              l = s.size() - index_start;
-            }
-            else
-            {
-              return false;
-            }
-          }
-          for (unsigned len = 1; len <= s.size() - index_start; len++)
-          {
-            CVC4::String t = s.substr(index_start, len);
-            if (testConstStringInRegExp(t, 0, r[0]))
-            {
-              Node num2 =
-                  NodeManager::currentNM()->mkConst(CVC4::Rational(l - 1));
-              Node r2 = NodeManager::currentNM()->mkNode(
-                  kind::REGEXP_LOOP, r[0], num2, num2);
-              if (testConstStringInRegExp(s, index_start + len, r2))
-              {
-                return true;
-              }
-            }
-          }
-          return false;
-        }
-      }
-    }
-    case REGEXP_COMPLEMENT:
-    {
-      return !testConstStringInRegExp(s, index_start, r[0]);
-      break;
-    }
-    default:
-    {
-      Assert(!RegExpOpr::isRegExpKind(k));
-      return false;
-    }
-  }
-}
-
  Node SequencesRewriter::rewriteMembership(TNode node)
  {
    NodeManager* nm = NodeManager::currentNM();
@@ -1626,12 +1092,12 @@ Node SequencesRewriter::rewriteMembership(TNode node)
      Node retNode = NodeManager::currentNM()->mkConst(false);
      return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY);
    }
-  else if (x.isConst() && isConstRegExp(r))
+  else if (x.isConst() && RegExpEntail::isConstRegExp(r))
    {
      // test whether x in node[1]
      CVC4::String s = x.getConst<String>();
-    Node retNode =
-        NodeManager::currentNM()->mkConst(testConstStringInRegExp(s, 0, r));
+    bool test = RegExpEntail::testConstStringInRegExp(s, 0, r);
+    Node retNode = NodeManager::currentNM()->mkConst(test);
      return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL);
    }
    else if (r.getKind() == kind::REGEXP_SIGMA)
@@ -1666,7 +1132,7 @@ Node SequencesRewriter::rewriteMembership(TNode node)
        // (str.in.re (str.++ x1 ... xn) (re.* R)) -->
        //   (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
        //     if the length of all strings in R is one.
-      Node flr = getFixedLengthForRegexp(r[0]);
+      Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]);
        if (!flr.isNull())
        {
          Node one = nm->mkConst(Rational(1));
@@ -1794,7 +1260,7 @@ Node SequencesRewriter::rewriteMembership(TNode node)
          success = false;
          std::vector<Node> children;
          utils::getConcat(r[0], children);
-        Node scn = simpleRegexpConsume(mchildren, children, dir);
+        Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
          if (!scn.isNull())
          {
            Trace("regexp-ext-rewrite")
@@ -1834,7 +1300,7 @@ Node SequencesRewriter::rewriteMembership(TNode node)
      std::vector<Node> mchildren;
      utils::getConcat(x, mchildren);
      unsigned prevSize = children.size() + mchildren.size();
-    Node scn = simpleRegexpConsume(mchildren, children);
+    Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
      if (!scn.isNull())
      {
        Trace("regexp-ext-rewrite")
@@ -2003,19 +1469,6 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node)
    return RewriteResponse(REWRITE_DONE, retNode);
  }
  
-bool SequencesRewriter::hasEpsilonNode(TNode node)
-{
-  for (unsigned int i = 0; i < node.getNumChildren(); i++)
-  {
-    if (node[i].getKind() == kind::STRING_TO_REGEXP && node[i][0].isConst()
-        && Word::isEmpty(node[i][0]))
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
  RewriteResponse SequencesRewriter::preRewrite(TNode node)
  {
    return RewriteResponse(REWRITE_DONE, node);
@@ -2106,12 +1559,12 @@ Node SequencesRewriter::rewriteSubstr(Node node)
    Node zero = nm->mkConst(CVC4::Rational(0));
  
    // if entailed non-positive length or negative start point
-  if (checkEntailArith(zero, node[1], true))
+  if (ArithEntail::check(zero, node[1], true))
    {
      Node ret = Word::mkEmptyWord(node.getType());
      return returnRewrite(node, ret, Rewrite::SS_START_NEG);
    }
-  else if (checkEntailArith(zero, node[2]))
+  else if (ArithEntail::check(zero, node[2]))
    {
      Node ret = Word::mkEmptyWord(node.getType());
      return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS);
@@ -2137,7 +1590,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
      // over-approximation of the length of (str.substr x a a), which
      // then allows us to reason that the result of the whole term must
      // be empty.
-    if (checkEntailArith(node[1], node[0][2]))
+    if (ArithEntail::check(node[1], node[0][2]))
      {
        Node ret = Word::mkEmptyWord(node.getType());
        return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN);
@@ -2150,8 +1603,8 @@ Node SequencesRewriter::rewriteSubstr(Node node)
      // if (str.len y) = 1 and (str.len z) = 1
      if (node[1] == zero)
      {
-      if (checkEntailLengthOne(node[0][1], true)
-          && checkEntailLengthOne(node[0][2], true))
+      if (StringsEntail::checkLengthOne(node[0][1], true)
+          && StringsEntail::checkLengthOne(node[0][2], true))
        {
          Node ret = nm->mkNode(
              kind::STRING_STRREPL,
@@ -2172,7 +1625,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
    {
      Node curr = node[2];
      std::vector<Node> childrenr;
-    if (stripSymbolicLength(n1, childrenr, 1, curr))
+    if (StringsEntail::stripSymbolicLength(n1, childrenr, 1, curr))
      {
        if (curr != zero && !n1.empty())
        {
@@ -2204,7 +1657,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
        Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
        if (node[2] != tot_len)
        {
-        if (checkEntailArith(node[2], tot_len))
+        if (ArithEntail::check(node[2], tot_len))
          {
            // end point beyond end point of string, map to tot_len
            Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
@@ -2220,7 +1673,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
        // (str.substr s x y) --> "" if x < len(s) |= 0 >= y
        Node n1_lt_tot_len =
            Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
-      if (checkEntailArithWithAssumption(n1_lt_tot_len, zero, node[2], false))
+      if (ArithEntail::checkWithAssumption(n1_lt_tot_len, zero, node[2], false))
        {
          Node ret = Word::mkEmptyWord(node.getType());
          return returnRewrite(node, ret, Rewrite::SS_START_ENTAILS_ZERO_LEN);
@@ -2229,7 +1682,8 @@ Node SequencesRewriter::rewriteSubstr(Node node)
        // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s)
        Node non_zero_len =
            Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
-      if (checkEntailArithWithAssumption(non_zero_len, node[1], tot_len, false))
+      if (ArithEntail::checkWithAssumption(
+              non_zero_len, node[1], tot_len, false))
        {
          Node ret = Word::mkEmptyWord(node.getType());
          return returnRewrite(node, ret, Rewrite::SS_NON_ZERO_LEN_ENTAILS_OOB);
@@ -2238,7 +1692,8 @@ Node SequencesRewriter::rewriteSubstr(Node node)
        // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s)
        Node geq_zero_start =
            Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
-      if (checkEntailArithWithAssumption(geq_zero_start, zero, tot_len, false))
+      if (ArithEntail::checkWithAssumption(
+              geq_zero_start, zero, tot_len, false))
        {
          Node ret = Word::mkEmptyWord(node.getType());
          return returnRewrite(
@@ -2246,7 +1701,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
        }
  
        // (str.substr s x x) ---> "" if (str.len s) <= 1
-      if (node[1] == node[2] && checkEntailLengthOne(node[0]))
+      if (node[1] == node[2] && StringsEntail::checkLengthOne(node[0]))
        {
          Node ret = Word::mkEmptyWord(node.getType());
          return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z);
@@ -2257,7 +1712,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
        // strip off components while quantity is entailed positive
        int dir = r == 0 ? 1 : -1;
        std::vector<Node> childrenr;
-      if (stripSymbolicLength(n1, childrenr, dir, curr))
+      if (StringsEntail::stripSymbolicLength(n1, childrenr, dir, curr))
        {
          if (r == 0)
          {
@@ -2281,7 +1736,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
    {
      Node start_inner = node[0][1];
      Node start_outer = node[1];
-    if (checkEntailArith(start_outer) && checkEntailArith(start_inner))
+    if (ArithEntail::check(start_outer) && ArithEntail::check(start_inner))
      {
        // both are positive
        // thus, start point is definitely start_inner+start_outer.
@@ -2298,11 +1753,11 @@ Node SequencesRewriter::rewriteSubstr(Node node)
        {
          new_len = len_from_inner;
        }
-      else if (checkEntailArith(len_from_inner, len_from_outer))
+      else if (ArithEntail::check(len_from_inner, len_from_outer))
        {
          new_len = len_from_outer;
        }
-      else if (checkEntailArith(len_from_outer, len_from_inner))
+      else if (ArithEntail::check(len_from_outer, len_from_inner))
        {
          new_len = len_from_inner;
        }
@@ -2343,7 +1798,7 @@ Node SequencesRewriter::rewriteContains(Node node)
        {
          Node len1 =
              NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
-        if (checkEntailArith(len1, true))
+        if (ArithEntail::check(len1, true))
          {
            // we handle the false case here since the rewrite for equality
            // uses this function, hence we want to conclude false if possible.
@@ -2352,7 +1807,7 @@ Node SequencesRewriter::rewriteContains(Node node)
            return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR);
          }
        }
-      else if (checkEntailLengthOne(t))
+      else if (StringsEntail::checkLengthOne(t))
        {
          std::vector<Node> vec = Word::getChars(node[0]);
          Node emp = Word::mkEmptyWord(t.getType());
@@ -2372,7 +1827,8 @@ Node SequencesRewriter::rewriteContains(Node node)
        else if (node[1].getKind() == kind::STRING_CONCAT)
        {
          int firstc, lastc;
-        if (!canConstantContainConcat(node[0], node[1], firstc, lastc))
+        if (!StringsEntail::canConstantContainConcat(
+                node[0], node[1], firstc, lastc))
          {
            Node ret = NodeManager::currentNM()->mkConst(false);
            return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT);
@@ -2410,7 +1866,7 @@ Node SequencesRewriter::rewriteContains(Node node)
        }
        else if (node[0].getKind() == STRING_STRREPL)
        {
-        Node rplDomain = checkEntailContains(node[0][1], node[1]);
+        Node rplDomain = StringsEntail::checkContains(node[0][1], node[1]);
          if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
          {
            Node d1 = nm->mkNode(STRING_STRCTN, node[0][0], node[1]);
@@ -2436,7 +1892,7 @@ Node SequencesRewriter::rewriteContains(Node node)
    // component-wise containment
    std::vector<Node> nc1rb;
    std::vector<Node> nc1re;
-  if (componentContains(nc1, nc2, nc1rb, nc1re) != -1)
+  if (StringsEntail::componentContains(nc1, nc2, nc1rb, nc1re) != -1)
    {
      Node ret = NodeManager::currentNM()->mkConst(true);
      return returnRewrite(node, ret, Rewrite::CTN_COMPONENT);
@@ -2446,7 +1902,7 @@ Node SequencesRewriter::rewriteContains(Node node)
    // strip endpoints
    std::vector<Node> nb;
    std::vector<Node> ne;
-  if (stripConstantEndpoints(nc1, nc2, nb, ne))
+  if (StringsEntail::stripConstantEndpoints(nc1, nc2, nb, ne))
    {
      Node ret = NodeManager::currentNM()->mkNode(
          kind::STRING_STRCTN, utils::mkConcat(nc1, stype), node[1]);
@@ -2464,10 +1920,10 @@ Node SequencesRewriter::rewriteContains(Node node)
        // replacement does not change y. (str.contains x w) checks that if the
        // replacement changes anything in y, the w makes it impossible for it to
        // occur in x.
-      Node ctnConst = checkEntailContains(node[0], n[0]);
+      Node ctnConst = StringsEntail::checkContains(node[0], n[0]);
        if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
        {
-        Node ctnConst2 = checkEntailContains(node[0], n[2]);
+        Node ctnConst2 = StringsEntail::checkContains(node[0], n[2]);
          if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
          {
            Node res = nm->mkConst(false);
@@ -2511,7 +1967,7 @@ Node SequencesRewriter::rewriteContains(Node node)
    // length entailment
    Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
    Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
-  if (checkEntailArith(len_n2, len_n1, true))
+  if (ArithEntail::check(len_n2, len_n1, true))
    {
      // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
      Node ret = NodeManager::currentNM()->mkConst(false);
@@ -2522,13 +1978,13 @@ Node SequencesRewriter::rewriteContains(Node node)
    //   For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
    //   since the number of a's in the second argument is greater than the number
    //   of a's in the first argument
-  if (checkEntailMultisetSubset(node[0], node[1]))
+  if (StringsEntail::checkMultisetSubset(node[0], node[1]))
    {
      Node ret = nm->mkConst(false);
      return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS);
    }
  
-  if (checkEntailArith(len_n2, len_n1, false))
+  if (ArithEntail::check(len_n2, len_n1, false))
    {
      // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
      Node ret = node[0].eqNode(node[1]);
@@ -2613,7 +2069,7 @@ Node SequencesRewriter::rewriteContains(Node node)
  
        // (str.contains (str.replace x y x) z) ---> (str.contains x z)
        // if (str.len z) <= 1
-      if (checkEntailLengthOne(node[1]))
+      if (StringsEntail::checkLengthOne(node[1]))
        {
          Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]);
          return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN);
@@ -2633,9 +2089,9 @@ Node SequencesRewriter::rewriteContains(Node node)
      // (str.contains (str.replace x y z) w) --->
      //   (str.contains (str.replace x y "") w)
      // if (str.contains z w) ---> false and (str.len w) = 1
-    if (checkEntailLengthOne(node[1]))
+    if (StringsEntail::checkLengthOne(node[1]))
      {
-      Node ctn = checkEntailContains(node[1], node[0][2]);
+      Node ctn = StringsEntail::checkContains(node[1], node[0][2]);
        if (!ctn.isNull() && !ctn.getConst<bool>())
        {
          Node empty = nm->mkConst(String(""));
@@ -2734,7 +2190,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
          return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START);
        }
      }
-    if (checkEntailArith(node[2], true))
+    if (ArithEntail::check(node[2], true))
      {
        // y>0  implies  indexof( x, x, y ) --> -1
        Node negone = nm->mkConst(Rational(-1));
@@ -2757,7 +2213,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
    {
      if (Word::isEmpty(node[1]))
      {
-      if (checkEntailArith(len0, node[2]) && checkEntailArith(node[2]))
+      if (ArithEntail::check(len0, node[2]) && ArithEntail::check(node[2]))
        {
          // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
          return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF);
@@ -2765,7 +2221,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
      }
    }
  
-  if (checkEntailArith(len1, len0m2, true))
+  if (ArithEntail::check(len1, len0m2, true))
    {
      // len(x)-z < len(y)  implies  indexof( x, y, z ) ----> -1
      Node negone = nm->mkConst(Rational(-1));
@@ -2779,7 +2235,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
      fstr = Rewriter::rewrite(fstr);
    }
  
-  Node cmp_conr = checkEntailContains(fstr, node[1]);
+  Node cmp_conr = StringsEntail::checkContains(fstr, node[1]);
    Trace("strings-rewrite-debug") << "For " << node << ", check contains("
                                   << fstr << ", " << node[1] << ")" << std::endl;
    Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
@@ -2794,7 +2250,8 @@ Node SequencesRewriter::rewriteIndexof(Node node)
          // past the first position in node[0] that contains node[1], we can drop
          std::vector<Node> nb;
          std::vector<Node> ne;
-        int cc = componentContains(children0, children1, nb, ne, true, 1);
+        int cc = StringsEntail::componentContains(
+            children0, children1, nb, ne, true, 1);
          if (cc != -1 && !ne.empty())
          {
            // For example:
@@ -2805,7 +2262,8 @@ Node SequencesRewriter::rewriteIndexof(Node node)
          }
  
          // Strip components from the beginning that are guaranteed not to match
-        if (stripConstantEndpoints(children0, children1, nb, ne, 1))
+        if (StringsEntail::stripConstantEndpoints(
+                children0, children1, nb, ne, 1))
          {
            // str.indexof(str.++("AB", x, "C"), "C", 0) --->
            // 2 + str.indexof(str.++(x, "C"), "C", 0)
@@ -2823,7 +2281,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
        // strip symbolic length
        Node new_len = node[2];
        std::vector<Node> nr;
-      if (stripSymbolicLength(children0, nr, 1, new_len))
+      if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
        {
          // For example:
          // z>str.len( x1 ) and str.contains( x2, y )-->true
@@ -2849,7 +2307,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
    {
      Node new_len = node[2];
      std::vector<Node> nr;
-    if (stripSymbolicLength(children0, nr, 1, new_len))
+    if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
      {
        // Normalize the string before the start index.
        //
@@ -2875,7 +2333,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
    {
      std::vector<Node> cb;
      std::vector<Node> ce;
-    if (stripConstantEndpoints(children0, children1, cb, ce, -1))
+    if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce, -1))
      {
        Node ret = utils::mkConcat(children0, stype);
        ret = nm->mkNode(STRING_STRIDOF, ret, node[1], node[2]);
@@ -2950,14 +2408,14 @@ Node SequencesRewriter::rewriteReplace(Node node)
      // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
      Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
      Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
-    if (checkEntailArith(l1, l0))
+    if (ArithEntail::check(l1, l0))
      {
        return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID);
      }
  
      // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
      // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
-    if (checkEntailLengthOne(node[0]))
+    if (StringsEntail::checkLengthOne(node[0]))
      {
        Node empty = nm->mkConst(String(""));
        Node rn1 = Rewriter::rewrite(
@@ -2966,7 +2424,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
        {
          std::vector<Node> emptyNodes;
          bool allEmptyEqs;
-        std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(rn1);
+        std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1);
  
          if (allEmptyEqs)
          {
@@ -2987,14 +2445,15 @@ Node SequencesRewriter::rewriteReplace(Node node)
    // check if contains definitely does (or does not) hold
    Node cmp_con = nm->mkNode(kind::STRING_STRCTN, node[0], node[1]);
    Node cmp_conr = Rewriter::rewrite(cmp_con);
-  if (!checkEntailContains(node[0], node[1]).isNull())
+  if (!StringsEntail::checkContains(node[0], node[1]).isNull())
    {
      if (cmp_conr.getConst<bool>())
      {
        // component-wise containment
        std::vector<Node> cb;
        std::vector<Node> ce;
-      int cc = componentContains(children0, children1, cb, ce, true, 1);
+      int cc = StringsEntail::componentContains(
+          children0, children1, cb, ce, true, 1);
        if (cc != -1)
        {
          if (cc == 0 && children0[0] == children1[0])
@@ -3052,7 +2511,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
  
      std::vector<Node> emptyNodes;
      bool allEmptyEqs;
-    std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(cmp_conr);
+    std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr);
  
      if (emptyNodes.size() > 0)
      {
@@ -3089,7 +2548,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
  
    if (cmp_conr != cmp_con)
    {
-    if (checkEntailNonEmpty(node[1]))
+    if (StringsEntail::checkNonEmpty(node[1]))
      {
        // pull endpoints that can be stripped
        // for example,
@@ -3097,7 +2556,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
        //   str.++( "b", str.replace( x, "a", y ), "b" )
        std::vector<Node> cb;
        std::vector<Node> ce;
-      if (stripConstantEndpoints(children0, children1, cb, ce))
+      if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce))
        {
          std::vector<Node> cc;
          cc.insert(cc.end(), cb.begin(), cb.end());
@@ -3138,7 +2597,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
      Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
      Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
      // Check len(t) + j > len(x) + 1
-    if (checkEntailArith(maxLen1, len0_1, true))
+    if (ArithEntail::check(maxLen1, len0_1, true))
      {
        children1.push_back(nm->mkNode(
            kind::STRING_SUBSTR,
@@ -3188,7 +2647,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
        // (str.len w) >= (str.len z)
        Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
        Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
-      if (checkEntailArith(wlen, zlen))
+      if (ArithEntail::check(wlen, zlen))
        {
          // w != z
          Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
@@ -3214,7 +2673,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
          return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID);
        }
        bool dualReplIteSuccess = false;
-      Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]);
+      Node cmp_con2 = StringsEntail::checkContains(node[1][0], node[1][2]);
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
        {
          // str.contains( x, z ) ---> false
@@ -3229,10 +2688,10 @@ Node SequencesRewriter::rewriteReplace(Node node)
          //   implies
          // str.replace( x, str.replace( x, y, z ), w ) --->
          // ite( str.contains( x, y ), x, w )
-        cmp_con2 = checkEntailContains(node[1][1], node[1][2]);
+        cmp_con2 = StringsEntail::checkContains(node[1][1], node[1][2]);
          if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
          {
-          cmp_con2 = checkEntailContains(node[1][2], node[1][1]);
+          cmp_con2 = StringsEntail::checkContains(node[1][2], node[1][1]);
            if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
            {
              dualReplIteSuccess = true;
@@ -3262,7 +2721,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
          // str.contains(y, z) ----> false and ( y == w or x == w ) implies
          //   implies
          // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
-        Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]);
+        Node cmp_con2 = StringsEntail::checkContains(node[1][0], node[1][2]);
          invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
        }
      }
@@ -3271,10 +2730,10 @@ Node SequencesRewriter::rewriteReplace(Node node)
        // str.contains(x, z) ----> false and str.contains(x, w) ----> false
        //   implies
        // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
-      Node cmp_con2 = checkEntailContains(node[0], node[1][1]);
+      Node cmp_con2 = StringsEntail::checkContains(node[0], node[1][1]);
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
        {
-        cmp_con2 = checkEntailContains(node[0], node[1][2]);
+        cmp_con2 = StringsEntail::checkContains(node[0], node[1][2]);
          invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
        }
      }
@@ -3290,7 +2749,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
      {
        // str.contains( z, w ) ----> false implies
        // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
-      Node cmp_con2 = checkEntailContains(node[1], node[2][0]);
+      Node cmp_con2 = StringsEntail::checkContains(node[1], node[2][0]);
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
        {
          Node res =
@@ -3310,7 +2769,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
        {
          // str.contains( x, z ) ----> false implies
          // str.replace( x, y, str.replace( y, z, w ) ) ---> x
-        cmp_con = checkEntailContains(node[0], node[2][1]);
+        cmp_con = StringsEntail::checkContains(node[0], node[2][1]);
          success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
        }
        if (success)
@@ -3324,7 +2783,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
    //   str.replace( x ++ y ++ x ++ y, "A", z ) -->
    //   str.replace( x ++ y, "A", z ) ++ x ++ y
    // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
-  if (checkEntailLengthOne(node[1]))
+  if (StringsEntail::checkLengthOne(node[1]))
    {
      Node lastLhs;
      unsigned lastCheckIndex = 0;
@@ -3336,7 +2795,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
            checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
        Node lhs = utils::mkConcat(checkLhs, stype);
        Node rhs = children0[checkIndex];
-      Node ctn = checkEntailContains(lhs, rhs);
+      Node ctn = StringsEntail::checkContains(lhs, rhs);
        if (!ctn.isNull() && ctn.getConst<bool>())
        {
          lastLhs = lhs;
@@ -3442,7 +2901,7 @@ Node SequencesRewriter::rewriteReplaceInternal(Node node)
    if (node[0] == node[1])
    {
      // only holds for replaceall if non-empty
-    if (nk == STRING_STRREPL || checkEntailNonEmpty(node[1]))
+    if (nk == STRING_STRREPL || StringsEntail::checkNonEmpty(node[1]))
      {
        return returnRewrite(node, node[2], Rewrite::RPL_REPLACE);
      }
@@ -3550,7 +3009,7 @@ Node SequencesRewriter::rewritePrefixSuffix(Node n)
  
    // Check if we can turn the prefix/suffix into equalities by showing that the
    // prefix/suffix is at least as long as the string
-  Node eqs = inferEqsFromContains(n[1], n[0]);
+  Node eqs = StringsEntail::inferEqsFromContains(n[1], n[0]);
    if (!eqs.isNull())
    {
      return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS);
@@ -3563,2041 +3022,78 @@ Node SequencesRewriter::rewritePrefixSuffix(Node n)
    return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM);
  }
  
-Node SequencesRewriter::splitConstant(Node a, Node b, int& index, bool isRev)
+Node SequencesRewriter::lengthPreserveRewrite(Node n)
  {
-  Assert(a.isConst() && b.isConst());
-  size_t lenA = Word::getLength(a);
-  size_t lenB = Word::getLength(b);
-  index = lenA <= lenB ? 1 : 0;
-  size_t len_short = index == 1 ? lenA : lenB;
-  bool cmp =
-      isRev ? a.getConst<String>().rstrncmp(b.getConst<String>(), len_short)
-            : a.getConst<String>().strncmp(b.getConst<String>(), len_short);
-  if (cmp)
-  {
-    Node l = index == 0 ? a : b;
-    if (isRev)
-    {
-      int new_len = l.getConst<String>().size() - len_short;
-      return Word::substr(l, 0, new_len);
-    }
-    else
-    {
-      return Word::substr(l, len_short);
-    }
-  }
-  // not the same prefix/suffix
-  return Node::null();
+  NodeManager* nm = NodeManager::currentNM();
+  Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
+  Node res = canonicalStrForSymbolicLength(len, n.getType());
+  return res.isNull() ? n : res;
  }
  
-bool SequencesRewriter::canConstantContainConcat(Node c,
-                                                 Node n,
-                                                 int& firstc,
-                                                 int& lastc)
+Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
  {
-  Assert(c.isConst());
-  CVC4::String t = c.getConst<String>();
-  const std::vector<unsigned>& tvec = t.getVec();
-  Assert(n.getKind() == kind::STRING_CONCAT);
-  // must find constant components in order
-  size_t pos = 0;
-  firstc = -1;
-  lastc = -1;
-  for (unsigned i = 0; i < n.getNumChildren(); i++)
-  {
-    if (n[i].isConst())
+  NodeManager* nm = NodeManager::currentNM();
+
+  Node res;
+  if (len.getKind() == CONST_RATIONAL)
+  {
+    // c -> "A" repeated c times
+    Rational ratLen = len.getConst<Rational>();
+    Assert(ratLen.getDenominator() == 1);
+    Integer intLen = ratLen.getNumerator();
+    uint32_t u = intLen.getUnsignedInt();
+    if (stype.isString())
      {
-      firstc = firstc == -1 ? i : firstc;
-      lastc = i;
-      CVC4::String s = n[i].getConst<String>();
-      size_t new_pos = t.find(s, pos);
-      if (new_pos == std::string::npos)
-      {
-        return false;
-      }
-      else
-      {
-        pos = new_pos + s.size();
-      }
+      res = nm->mkConst(String(std::string(u, 'A')));
      }
-    else if (n[i].getKind() == kind::STRING_ITOS && checkEntailArith(n[i][0]))
+    else
      {
-      // find the first occurrence of a digit starting at pos
-      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
-      {
-        pos++;
-      }
-      if (pos == tvec.size())
-      {
-        return false;
-      }
-      // must consume at least one digit here
-      pos++;
+      Unimplemented() << "canonicalStrForSymbolicLength for non-string";
      }
    }
-  return true;
-}
-
-bool SequencesRewriter::canConstantContainList(Node c,
-                                               std::vector<Node>& l,
-                                               int& firstc,
-                                               int& lastc)
-{
-  Assert(c.isConst());
-  // must find constant components in order
-  size_t pos = 0;
-  firstc = -1;
-  lastc = -1;
-  for (unsigned i = 0; i < l.size(); i++)
-  {
-    if (l[i].isConst())
+  else if (len.getKind() == PLUS)
+  {
+    // x + y -> norm(x) + norm(y)
+    NodeBuilder<> concatBuilder(STRING_CONCAT);
+    for (const auto& n : len)
      {
-      firstc = firstc == -1 ? i : firstc;
-      lastc = i;
-      size_t new_pos = Word::find(c, l[i], pos);
-      if (new_pos == std::string::npos)
-      {
-        return false;
-      }
-      else
+      Node sn = canonicalStrForSymbolicLength(n, stype);
+      if (sn.isNull())
        {
-        pos = new_pos + Word::getLength(l[i]);
-      }
-    }
-  }
-  return true;
-}
-
-bool SequencesRewriter::stripSymbolicLength(std::vector<Node>& n1,
-                                            std::vector<Node>& nr,
-                                            int dir,
-                                            Node& curr)
-{
-  Assert(dir == 1 || dir == -1);
-  Assert(nr.empty());
-  Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
-  bool ret = false;
-  bool success;
-  unsigned sindex = 0;
-  do
-  {
-    Assert(!curr.isNull());
-    success = false;
-    if (curr != zero && sindex < n1.size())
-    {
-      unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
-      if (n1[sindex_use].isConst())
-      {
-        // could strip part of a constant
-        Node lowerBound = getConstantArithBound(Rewriter::rewrite(curr));
-        if (!lowerBound.isNull())
-        {
-          Assert(lowerBound.isConst());
-          Rational lbr = lowerBound.getConst<Rational>();
-          if (lbr.sgn() > 0)
-          {
-            Assert(checkEntailArith(curr, true));
-            CVC4::String s = n1[sindex_use].getConst<String>();
-            Node ncl =
-                NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
-            Node next_s =
-                NodeManager::currentNM()->mkNode(kind::MINUS, lowerBound, ncl);
-            next_s = Rewriter::rewrite(next_s);
-            Assert(next_s.isConst());
-            // we can remove the entire constant
-            if (next_s.getConst<Rational>().sgn() >= 0)
-            {
-              curr = Rewriter::rewrite(
-                  NodeManager::currentNM()->mkNode(kind::MINUS, curr, ncl));
-              success = true;
-              sindex++;
-            }
-            else
-            {
-              // we can remove part of the constant
-              // lower bound minus the length of a concrete string is negative,
-              // hence lowerBound cannot be larger than long max
-              Assert(lbr < Rational(String::maxSize()));
-              curr = Rewriter::rewrite(NodeManager::currentNM()->mkNode(
-                  kind::MINUS, curr, lowerBound));
-              uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
-              Assert(lbsize < s.size());
-              if (dir == 1)
-              {
-                // strip partially from the front
-                nr.push_back(
-                    NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
-                n1[sindex_use] = NodeManager::currentNM()->mkConst(
-                    s.suffix(s.size() - lbsize));
-              }
-              else
-              {
-                // strip partially from the back
-                nr.push_back(
-                    NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
-                n1[sindex_use] = NodeManager::currentNM()->mkConst(
-                    s.prefix(s.size() - lbsize));
-              }
-              ret = true;
-            }
-            Assert(checkEntailArith(curr));
-          }
-          else
-          {
-            // we cannot remove the constant
-          }
-        }
-      }
-      else
-      {
-        Node next_s = NodeManager::currentNM()->mkNode(
-            kind::MINUS,
-            curr,
-            NodeManager::currentNM()->mkNode(kind::STRING_LENGTH,
-                                             n1[sindex_use]));
-        next_s = Rewriter::rewrite(next_s);
-        if (checkEntailArith(next_s))
-        {
-          success = true;
-          curr = next_s;
-          sindex++;
-        }
-      }
-    }
-  } while (success);
-  if (sindex > 0)
-  {
-    if (dir == 1)
-    {
-      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
-      n1.erase(n1.begin(), n1.begin() + sindex);
-    }
-    else
-    {
-      nr.insert(nr.end(), n1.end() - sindex, n1.end());
-      n1.erase(n1.end() - sindex, n1.end());
-    }
-    ret = true;
-  }
-  return ret;
-}
-
-int SequencesRewriter::componentContains(std::vector<Node>& n1,
-                                         std::vector<Node>& n2,
-                                         std::vector<Node>& nb,
-                                         std::vector<Node>& ne,
-                                         bool computeRemainder,
-                                         int remainderDir)
-{
-  Assert(nb.empty());
-  Assert(ne.empty());
-  // if n2 is a singleton, we can do optimized version here
-  if (n2.size() == 1)
-  {
-    for (unsigned i = 0; i < n1.size(); i++)
-    {
-      Node n1rb;
-      Node n1re;
-      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
-      {
-        if (computeRemainder)
-        {
-          n1[i] = n2[0];
-          if (remainderDir != -1)
-          {
-            if (!n1re.isNull())
-            {
-              ne.push_back(n1re);
-            }
-            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
-            n1.erase(n1.begin() + i + 1, n1.end());
-          }
-          else if (!n1re.isNull())
-          {
-            n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode(
-                kind::STRING_CONCAT, n1[i], n1re));
-          }
-          if (remainderDir != 1)
-          {
-            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
-            n1.erase(n1.begin(), n1.begin() + i);
-            if (!n1rb.isNull())
-            {
-              nb.push_back(n1rb);
-            }
-          }
-          else if (!n1rb.isNull())
-          {
-            n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode(
-                kind::STRING_CONCAT, n1rb, n1[i]));
-          }
-        }
-        return i;
-      }
-    }
-  }
-  else if (n1.size() >= n2.size())
-  {
-    unsigned diff = n1.size() - n2.size();
-    for (unsigned i = 0; i <= diff; i++)
-    {
-      Node n1rb_first;
-      Node n1re_first;
-      // first component of n2 must be a suffix
-      if (componentContainsBase(n1[i],
-                                n2[0],
-                                n1rb_first,
-                                n1re_first,
-                                1,
-                                computeRemainder && remainderDir != 1))
-      {
-        Assert(n1re_first.isNull());
-        for (unsigned j = 1; j < n2.size(); j++)
-        {
-          // are we in the last component?
-          if (j + 1 == n2.size())
-          {
-            Node n1rb_last;
-            Node n1re_last;
-            // last component of n2 must be a prefix
-            if (componentContainsBase(n1[i + j],
-                                      n2[j],
-                                      n1rb_last,
-                                      n1re_last,
-                                      -1,
-                                      computeRemainder && remainderDir != -1))
-            {
-              Assert(n1rb_last.isNull());
-              if (computeRemainder)
-              {
-                if (remainderDir != -1)
-                {
-                  if (!n1re_last.isNull())
-                  {
-                    ne.push_back(n1re_last);
-                  }
-                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
-                  n1.erase(n1.begin() + i + j + 1, n1.end());
-                  n1[i + j] = n2[j];
-                }
-                if (remainderDir != 1)
-                {
-                  n1[i] = n2[0];
-                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
-                  n1.erase(n1.begin(), n1.begin() + i);
-                  if (!n1rb_first.isNull())
-                  {
-                    nb.push_back(n1rb_first);
-                  }
-                }
-              }
-              return i;
-            }
-            else
-            {
-              break;
-            }
-          }
-          else if (n1[i + j] != n2[j])
-          {
-            break;
-          }
-        }
-      }
-    }
-  }
-  return -1;
-}
-
-bool SequencesRewriter::componentContainsBase(
-    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
-{
-  Assert(n1rb.isNull());
-  Assert(n1re.isNull());
-
-  NodeManager* nm = NodeManager::currentNM();
-
-  if (n1 == n2)
-  {
-    return true;
-  }
-  else
-  {
-    if (n1.isConst() && n2.isConst())
-    {
-      size_t len1 = Word::getLength(n1);
-      size_t len2 = Word::getLength(n2);
-      if (len2 < len1)
-      {
-        if (dir == 1)
-        {
-          if (Word::suffix(n1, len2) == n2)
-          {
-            if (computeRemainder)
-            {
-              n1rb = Word::prefix(n1, len1 - len2);
-            }
-            return true;
-          }
-        }
-        else if (dir == -1)
-        {
-          if (Word::prefix(n1, len2) == n2)
-          {
-            if (computeRemainder)
-            {
-              n1re = Word::suffix(n1, len1 - len2);
-            }
-            return true;
-          }
-        }
-        else
-        {
-          size_t f = Word::find(n1, n2);
-          if (f != std::string::npos)
-          {
-            if (computeRemainder)
-            {
-              if (f > 0)
-              {
-                n1rb = Word::prefix(n1, f);
-              }
-              if (len1 > f + len2)
-              {
-                n1re = Word::suffix(n1, len1 - (f + len2));
-              }
-            }
-            return true;
-          }
-        }
-      }
-    }
-    else
-    {
-      // cases for:
-      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
-      if (n2.getKind() == kind::STRING_SUBSTR)
-      {
-        if (n2[0] == n1)
-        {
-          bool success = true;
-          Node start_pos = n2[1];
-          Node end_pos = nm->mkNode(kind::PLUS, n2[1], n2[2]);
-          Node len_n2s = nm->mkNode(kind::STRING_LENGTH, n2[0]);
-          if (dir == 1)
-          {
-            // To be a suffix, start + length must be greater than
-            // or equal to the length of the string.
-            success = checkEntailArith(end_pos, len_n2s);
-          }
-          else if (dir == -1)
-          {
-            // To be a prefix, must literally start at 0, since
-            //   if we knew it started at <0, it should be rewritten to "",
-            //   if we knew it started at 0, then n2[1] should be rewritten to
-            //   0.
-            success = start_pos.isConst()
-                      && start_pos.getConst<Rational>().sgn() == 0;
-          }
-          if (success)
-          {
-            if (computeRemainder)
-            {
-              // we can only compute the remainder if start_pos and end_pos
-              // are known to be non-negative.
-              if (!checkEntailArith(start_pos) || !checkEntailArith(end_pos))
-              {
-                return false;
-              }
-              if (dir != 1)
-              {
-                n1rb = nm->mkNode(kind::STRING_SUBSTR,
-                                  n2[0],
-                                  nm->mkConst(Rational(0)),
-                                  start_pos);
-              }
-              if (dir != -1)
-              {
-                n1re = nm->mkNode(kind::STRING_SUBSTR, n2[0], end_pos, len_n2s);
-              }
-            }
-            return true;
-          }
-        }
-      }
-
-      if (!computeRemainder && dir == 0)
-      {
-        if (n1.getKind() == STRING_STRREPL)
-        {
-          // (str.contains (str.replace x y z) w) ---> true
-          // if (str.contains x w) --> true and (str.contains z w) ---> true
-          Node xCtnW = checkEntailContains(n1[0], n2);
-          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
-          {
-            Node zCtnW = checkEntailContains(n1[2], n2);
-            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
-            {
-              return true;
-            }
-          }
-        }
-      }
-    }
-  }
-  return false;
-}
-
-bool SequencesRewriter::stripConstantEndpoints(std::vector<Node>& n1,
-                                               std::vector<Node>& n2,
-                                               std::vector<Node>& nb,
-                                               std::vector<Node>& ne,
-                                               int dir)
-{
-  Assert(nb.empty());
-  Assert(ne.empty());
-
-  NodeManager* nm = NodeManager::currentNM();
-  bool changed = false;
-  // for ( forwards, backwards ) direction
-  for (unsigned r = 0; r < 2; r++)
-  {
-    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
-    {
-      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
-      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
-      bool removeComponent = false;
-      Node n1cmp = n1[index0];
-
-      if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
-      {
-        return false;
-      }
-
-      std::vector<Node> sss;
-      std::vector<Node> sls;
-      n1cmp = decomposeSubstrChain(n1cmp, sss, sls);
-      Trace("strings-rewrite-debug2")
-          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
-          << ", dir = " << dir << std::endl;
-      if (n1cmp.isConst())
-      {
-        CVC4::String s = n1cmp.getConst<String>();
-        // overlap is an overapproximation of the number of characters
-        // n2[index1] can match in s
-        unsigned overlap = s.size();
-        if (n2[index1].isConst())
-        {
-          CVC4::String t = n2[index1].getConst<String>();
-          std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
-          if (ret == std::string::npos)
-          {
-            if (n1.size() == 1)
-            {
-              // can remove everything
-              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
-              //   str.contains( "", str.++( "ba", x ) )
-              removeComponent = true;
-            }
-            else if (sss.empty())  // only if not substr
-            {
-              // check how much overlap there is
-              // This is used to partially strip off the endpoint
-              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
-              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
-              overlap = r == 0 ? s.overlap(t) : t.overlap(s);
-            }
-            else
-            {
-              // if we are looking at a substring, we can remove the component
-              // if there is no overlap
-              //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
-              //        --> str.contains( x, "a" )
-              removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
-            }
-          }
-          else if (sss.empty())  // only if not substr
-          {
-            Assert(ret < s.size());
-            // can strip off up to the find position, e.g.
-            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
-            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
-            // and
-            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
-            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
-            overlap = s.size() - ret;
-          }
-        }
-        else
-        {
-          // inconclusive
-        }
-        // process the overlap
-        if (overlap < s.size())
-        {
-          changed = true;
-          if (overlap == 0)
-          {
-            removeComponent = true;
-          }
-          else
-          {
-            // can drop the prefix (resp. suffix) from the first (resp. last)
-            // component
-            if (r == 0)
-            {
-              nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
-              n1[index0] = nm->mkConst(s.suffix(overlap));
-            }
-            else
-            {
-              ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
-              n1[index0] = nm->mkConst(s.prefix(overlap));
-            }
-          }
-        }
-      }
-      else if (n1cmp.getKind() == kind::STRING_ITOS)
-      {
-        if (n2[index1].isConst())
-        {
-          CVC4::String t = n2[index1].getConst<String>();
-
-          if (n1.size() == 1)
-          {
-            // if n1.size()==1, then if n2[index1] is not a number, we can drop
-            // the entire component
-            //    e.g. str.contains( int.to.str(x), "123a45") --> false
-            if (!t.isNumber())
-            {
-              removeComponent = true;
-            }
-          }
-          else
-          {
-            const std::vector<unsigned>& tvec = t.getVec();
-            Assert(tvec.size() > 0);
-
-            // if n1.size()>1, then if the first (resp. last) character of
-            // n2[index1]
-            //  is not a digit, we can drop the entire component, e.g.:
-            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
-            //    str.contains( y, "a12" )
-            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
-            //    str.contains( y, "a0b" )
-            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
-            if (!String::isDigit(tvec[i]))
-            {
-              removeComponent = true;
-            }
-          }
-        }
-      }
-      if (removeComponent)
-      {
-        // can drop entire first (resp. last) component
-        if (r == 0)
-        {
-          nb.push_back(n1[index0]);
-          n1.erase(n1.begin(), n1.begin() + 1);
-        }
-        else
-        {
-          ne.push_back(n1[index0]);
-          n1.pop_back();
-        }
-        if (n1.empty())
-        {
-          // if we've removed everything, just return (we will rewrite to false)
-          return true;
-        }
-        else
-        {
-          changed = true;
-        }
-      }
-    }
-  }
-  // TODO (#1180) : computing the maximal overlap in this function may be
-  // important.
-  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
-  // false
-  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
-  //   leaving 4 characters
-  //      which is larger that the upper bound for length of str.substr(y,0,3),
-  //      which is 3.
-  return changed;
-}
-
-Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
-{
-  NodeManager* nm = NodeManager::currentNM();
-
-  Node res;
-  if (len.getKind() == kind::CONST_RATIONAL)
-  {
-    // c -> "A" repeated c times
-    Rational ratLen = len.getConst<Rational>();
-    Assert(ratLen.getDenominator() == 1);
-    Integer intLen = ratLen.getNumerator();
-    uint32_t u = intLen.getUnsignedInt();
-    if (stype.isString())
-    {
-      res = nm->mkConst(String(std::string(u, 'A')));
-    }
-    else
-    {
-      Unimplemented() << "canonicalStrForSymbolicLength for non-string";
-    }
-  }
-  else if (len.getKind() == kind::PLUS)
-  {
-    // x + y -> norm(x) + norm(y)
-    NodeBuilder<> concatBuilder(kind::STRING_CONCAT);
-    for (const auto& n : len)
-    {
-      Node sn = canonicalStrForSymbolicLength(n, stype);
-      if (sn.isNull())
-      {
-        return Node::null();
-      }
-      std::vector<Node> snChildren;
-      utils::getConcat(sn, snChildren);
-      concatBuilder.append(snChildren);
-    }
-    res = concatBuilder.constructNode();
-  }
-  else if (len.getKind() == kind::MULT && len.getNumChildren() == 2
-           && len[0].isConst())
-  {
-    // c * x -> norm(x) repeated c times
-    Rational ratReps = len[0].getConst<Rational>();
-    Assert(ratReps.getDenominator() == 1);
-    Integer intReps = ratReps.getNumerator();
-
-    Node nRep = canonicalStrForSymbolicLength(len[1], stype);
-    std::vector<Node> nRepChildren;
-    utils::getConcat(nRep, nRepChildren);
-    NodeBuilder<> concatBuilder(kind::STRING_CONCAT);
-    for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
-    {
-      concatBuilder.append(nRepChildren);
-    }
-    res = concatBuilder.constructNode();
-  }
-  else if (len.getKind() == kind::STRING_LENGTH)
-  {
-    // len(x) -> x
-    res = len[0];
-  }
-  return res;
-}
-
-Node SequencesRewriter::lengthPreserveRewrite(Node n)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
-  Node res = canonicalStrForSymbolicLength(len, n.getType());
-  return res.isNull() ? n : res;
-}
-
-Node SequencesRewriter::checkEntailContains(Node a, Node b, bool fullRewriter)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  Node ctn = nm->mkNode(kind::STRING_STRCTN, a, b);
-
-  if (fullRewriter)
-  {
-    ctn = Rewriter::rewrite(ctn);
-  }
-  else
-  {
-    Node prev;
-    do
-    {
-      prev = ctn;
-      ctn = rewriteContains(ctn);
-    } while (prev != ctn && ctn.getKind() == kind::STRING_STRCTN);
-  }
-
-  Assert(ctn.getType().isBoolean());
-  return ctn.isConst() ? ctn : Node::null();
-}
-
-bool SequencesRewriter::checkEntailNonEmpty(Node a)
-{
-  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
-  len = Rewriter::rewrite(len);
-  return checkEntailArith(len, true);
-}
-
-bool SequencesRewriter::checkEntailLengthOne(Node s, bool strict)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  Node one = nm->mkConst(Rational(1));
-  Node len = nm->mkNode(STRING_LENGTH, s);
-  len = Rewriter::rewrite(len);
-  return checkEntailArith(one, len) && (!strict || checkEntailArith(len, true));
-}
-
-bool SequencesRewriter::checkEntailArithEq(Node a, Node b)
-{
-  if (a == b)
-  {
-    return true;
-  }
-  else
-  {
-    Node ar = Rewriter::rewrite(a);
-    Node br = Rewriter::rewrite(b);
-    return ar == br;
-  }
-}
-
-bool SequencesRewriter::checkEntailArith(Node a, Node b, bool strict)
-{
-  if (a == b)
-  {
-    return !strict;
-  }
-  else
-  {
-    Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b);
-    return checkEntailArith(diff, strict);
-  }
-}
-
-struct StrCheckEntailArithTag
-{
-};
-struct StrCheckEntailArithComputedTag
-{
-};
-/** Attribute true for expressions for which checkEntailArith returned true */
-typedef expr::Attribute<StrCheckEntailArithTag, bool> StrCheckEntailArithAttr;
-typedef expr::Attribute<StrCheckEntailArithComputedTag, bool>
-    StrCheckEntailArithComputedAttr;
-
-bool SequencesRewriter::checkEntailArith(Node a, bool strict)
-{
-  if (a.isConst())
-  {
-    return a.getConst<Rational>().sgn() >= (strict ? 1 : 0);
-  }
-
-  Node ar =
-      strict
-          ? NodeManager::currentNM()->mkNode(
-                kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1)))
-          : a;
-  ar = Rewriter::rewrite(ar);
-
-  if (ar.getAttribute(StrCheckEntailArithComputedAttr()))
-  {
-    return ar.getAttribute(StrCheckEntailArithAttr());
-  }
-
-  bool ret = checkEntailArithInternal(ar);
-  if (!ret)
-  {
-    // try with approximations
-    ret = checkEntailArithApprox(ar);
-  }
-  // cache the result
-  ar.setAttribute(StrCheckEntailArithAttr(), ret);
-  ar.setAttribute(StrCheckEntailArithComputedAttr(), true);
-  return ret;
-}
-
-bool SequencesRewriter::checkEntailArithApprox(Node ar)
-{
-  Assert(Rewriter::rewrite(ar) == ar);
-  NodeManager* nm = NodeManager::currentNM();
-  std::map<Node, Node> msum;
-  Trace("strings-ent-approx-debug")
-      << "Setup arithmetic approximations for " << ar << std::endl;
-  if (!ArithMSum::getMonomialSum(ar, msum))
-  {
-    Trace("strings-ent-approx-debug")
-        << "...failed to get monomial sum!" << std::endl;
-    return false;
-  }
-  // for each monomial v*c, mApprox[v] a list of
-  // possibilities for how the term can be soundly approximated, that is,
-  // if mApprox[v] contains av, then v*c > av*c. Notice that if c
-  // is positive, then v > av, otherwise if c is negative, then v < av.
-  // In other words, av is an under-approximation if c is positive, and an
-  // over-approximation if c is negative.
-  bool changed = false;
-  std::map<Node, std::vector<Node> > mApprox;
-  // map from approximations to their monomial sums
-  std::map<Node, std::map<Node, Node> > approxMsums;
-  // aarSum stores each monomial that does not have multiple approximations
-  std::vector<Node> aarSum;
-  for (std::pair<const Node, Node>& m : msum)
-  {
-    Node v = m.first;
-    Node c = m.second;
-    Trace("strings-ent-approx-debug")
-        << "Get approximations " << v << "..." << std::endl;
-    if (v.isNull())
-    {
-      Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c;
-      aarSum.push_back(mn);
-    }
-    else
-    {
-      // c.isNull() means c = 1
-      bool isOverApprox = !c.isNull() && c.getConst<Rational>().sgn() == -1;
-      std::vector<Node>& approx = mApprox[v];
-      std::unordered_set<Node, NodeHashFunction> visited;
-      std::vector<Node> toProcess;
-      toProcess.push_back(v);
-      do
-      {
-        Node curr = toProcess.back();
-        Trace("strings-ent-approx-debug") << "  process " << curr << std::endl;
-        curr = Rewriter::rewrite(curr);
-        toProcess.pop_back();
-        if (visited.find(curr) == visited.end())
-        {
-          visited.insert(curr);
-          std::vector<Node> currApprox;
-          getArithApproximations(curr, currApprox, isOverApprox);
-          if (currApprox.empty())
-          {
-            Trace("strings-ent-approx-debug")
-                << "...approximation: " << curr << std::endl;
-            // no approximations, thus curr is a possibility
-            approx.push_back(curr);
-          }
-          else
-          {
-            toProcess.insert(
-                toProcess.end(), currApprox.begin(), currApprox.end());
-          }
-        }
-      } while (!toProcess.empty());
-      Assert(!approx.empty());
-      // if we have only one approximation, move it to final
-      if (approx.size() == 1)
-      {
-        changed = v != approx[0];
-        Node mn = ArithMSum::mkCoeffTerm(c, approx[0]);
-        aarSum.push_back(mn);
-        mApprox.erase(v);
-      }
-      else
-      {
-        // compute monomial sum form for each approximation, used below
-        for (const Node& aa : approx)
-        {
-          if (approxMsums.find(aa) == approxMsums.end())
-          {
-            CVC4_UNUSED bool ret =
-                ArithMSum::getMonomialSum(aa, approxMsums[aa]);
-            Assert(ret);
-          }
-        }
-        changed = true;
-      }
-    }
-  }
-  if (!changed)
-  {
-    // approximations had no effect, return
-    Trace("strings-ent-approx-debug") << "...no approximations" << std::endl;
-    return false;
-  }
-  // get the current "fixed" sum for the abstraction of ar
-  Node aar = aarSum.empty()
-                 ? nm->mkConst(Rational(0))
-                 : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum));
-  aar = Rewriter::rewrite(aar);
-  Trace("strings-ent-approx-debug")
-      << "...processed fixed sum " << aar << " with " << mApprox.size()
-      << " approximated monomials." << std::endl;
-  // if we have a choice of how to approximate
-  if (!mApprox.empty())
-  {
-    // convert aar back to monomial sum
-    std::map<Node, Node> msumAar;
-    if (!ArithMSum::getMonomialSum(aar, msumAar))
-    {
-      return false;
-    }
-    if (Trace.isOn("strings-ent-approx"))
-    {
-      Trace("strings-ent-approx")
-          << "---- Check arithmetic entailment by under-approximation " << ar
-          << " >= 0" << std::endl;
-      Trace("strings-ent-approx") << "FIXED:" << std::endl;
-      ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx");
-      Trace("strings-ent-approx") << "APPROX:" << std::endl;
-      for (std::pair<const Node, std::vector<Node> >& a : mApprox)
-      {
-        Node c = msum[a.first];
-        Trace("strings-ent-approx") << "  ";
-        if (!c.isNull())
-        {
-          Trace("strings-ent-approx") << c << " * ";
-        }
-        Trace("strings-ent-approx")
-            << a.second << " ...from " << a.first << std::endl;
-      }
-      Trace("strings-ent-approx") << std::endl;
-    }
-    Rational one(1);
-    // incorporate monomials one at a time that have a choice of approximations
-    while (!mApprox.empty())
-    {
-      Node v;
-      Node vapprox;
-      int maxScore = -1;
-      // Look at each approximation, take the one with the best score.
-      // Notice that we are in the process of trying to prove
-      // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0,
-      // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar)
-      // and approx_1 ... approx_m are possible approximations. The
-      // intution here is that we want coefficients c1...cn to be positive.
-      // This is because arithmetic string terms t1...tn (which may be
-      // applications of len, indexof, str.to.int) are never entailed to be
-      // negative. Hence, we add the approx_i that contributes the "most"
-      // towards making all constants c1...cn positive and cancelling negative
-      // monomials in approx_i itself.
-      for (std::pair<const Node, std::vector<Node> >& nam : mApprox)
-      {
-        Node cr = msum[nam.first];
-        for (const Node& aa : nam.second)
-        {
-          unsigned helpsCancelCount = 0;
-          unsigned addsObligationCount = 0;
-          std::map<Node, Node>::iterator it;
-          // we are processing an approximation cr*( c1*t1 + ... + cn*tn )
-          for (std::pair<const Node, Node>& aam : approxMsums[aa])
-          {
-            // Say aar is of the form t + c*ti, and aam is the monomial ci*ti
-            // where ci != 0. We say aam:
-            // (1) helps cancel if c != 0 and c>0 != ci>0
-            // (2) adds obligation if c>=0 and c+ci<0
-            Node ti = aam.first;
-            Node ci = aam.second;
-            if (!cr.isNull())
-            {
-              ci = ci.isNull() ? cr
-                               : Rewriter::rewrite(nm->mkNode(MULT, ci, cr));
-            }
-            Trace("strings-ent-approx-debug") << ci << "*" << ti << " ";
-            int ciSgn = ci.isNull() ? 1 : ci.getConst<Rational>().sgn();
-            it = msumAar.find(ti);
-            if (it != msumAar.end())
-            {
-              Node c = it->second;
-              int cSgn = c.isNull() ? 1 : c.getConst<Rational>().sgn();
-              if (cSgn == 0)
-              {
-                addsObligationCount += (ciSgn == -1 ? 1 : 0);
-              }
-              else if (cSgn != ciSgn)
-              {
-                helpsCancelCount++;
-                Rational r1 = c.isNull() ? one : c.getConst<Rational>();
-                Rational r2 = ci.isNull() ? one : ci.getConst<Rational>();
-                Rational r12 = r1 + r2;
-                if (r12.sgn() == -1)
-                {
-                  addsObligationCount++;
-                }
-              }
-            }
-            else
-            {
-              addsObligationCount += (ciSgn == -1 ? 1 : 0);
-            }
-          }
-          Trace("strings-ent-approx-debug")
-              << "counts=" << helpsCancelCount << "," << addsObligationCount
-              << " for " << aa << " into " << aar << std::endl;
-          int score = (addsObligationCount > 0 ? 0 : 2)
-                      + (helpsCancelCount > 0 ? 1 : 0);
-          // if its the best, update v and vapprox
-          if (v.isNull() || score > maxScore)
-          {
-            v = nam.first;
-            vapprox = aa;
-            maxScore = score;
-          }
-        }
-        if (!v.isNull())
-        {
-          break;
-        }
-      }
-      Trace("strings-ent-approx")
-          << "- Decide " << v << " = " << vapprox << std::endl;
-      // we incorporate v approximated by vapprox into the overall approximation
-      // for ar
-      Assert(!v.isNull() && !vapprox.isNull());
-      Assert(msum.find(v) != msum.end());
-      Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox);
-      aar = nm->mkNode(PLUS, aar, mn);
-      // update the msumAar map
-      aar = Rewriter::rewrite(aar);
-      msumAar.clear();
-      if (!ArithMSum::getMonomialSum(aar, msumAar))
-      {
-        Assert(false);
-        Trace("strings-ent-approx")
-            << "...failed to get monomial sum!" << std::endl;
-        return false;
-      }
-      // we have processed the approximation for v
-      mApprox.erase(v);
-    }
-    Trace("strings-ent-approx") << "-----------------" << std::endl;
-  }
-  if (aar == ar)
-  {
-    Trace("strings-ent-approx-debug")
-        << "...approximation had no effect" << std::endl;
-    // this should never happen, but we avoid the infinite loop for sanity here
-    Assert(false);
-    return false;
-  }
-  // Check entailment on the approximation of ar.
-  // Notice that this may trigger further reasoning by approximation. For
-  // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be
-  // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on
-  // this call, where in the recursive call we may over-approximate
-  // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer
-  // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two
-  // steps.
-  if (checkEntailArith(aar))
-  {
-    Trace("strings-ent-approx")
-        << "*** StrArithApprox: showed " << ar
-        << " >= 0 using under-approximation!" << std::endl;
-    Trace("strings-ent-approx")
-        << "*** StrArithApprox: under-approximation was " << aar << std::endl;
-    return true;
-  }
-  return false;
-}
-
-void SequencesRewriter::getArithApproximations(Node a,
-                                               std::vector<Node>& approx,
-                                               bool isOverApprox)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  // We do not handle PLUS here since this leads to exponential behavior.
-  // Instead, this is managed, e.g. during checkEntailArithApprox, where
-  // PLUS terms are expanded "on-demand" during the reasoning.
-  Trace("strings-ent-approx-debug")
-      << "Get arith approximations " << a << std::endl;
-  Kind ak = a.getKind();
-  if (ak == MULT)
-  {
-    Node c;
-    Node v;
-    if (ArithMSum::getMonomial(a, c, v))
-    {
-      bool isNeg = c.getConst<Rational>().sgn() == -1;
-      getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox);
-      for (unsigned i = 0, size = approx.size(); i < size; i++)
-      {
-        approx[i] = nm->mkNode(MULT, c, approx[i]);
-      }
-    }
-  }
-  else if (ak == STRING_LENGTH)
-  {
-    Kind aak = a[0].getKind();
-    if (aak == STRING_SUBSTR)
-    {
-      // over,under-approximations for len( substr( x, n, m ) )
-      Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
-      if (isOverApprox)
-      {
-        // m >= 0 implies
-        //  m >= len( substr( x, n, m ) )
-        if (checkEntailArith(a[0][2]))
-        {
-          approx.push_back(a[0][2]);
-        }
-        if (checkEntailArith(lenx, a[0][1]))
-        {
-          // n <= len( x ) implies
-          //   len( x ) - n >= len( substr( x, n, m ) )
-          approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
-        }
-        else
-        {
-          // len( x ) >= len( substr( x, n, m ) )
-          approx.push_back(lenx);
-        }
-      }
-      else
-      {
-        // 0 <= n and n+m <= len( x ) implies
-        //   m <= len( substr( x, n, m ) )
-        Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]);
-        if (checkEntailArith(a[0][1]) && checkEntailArith(lenx, npm))
-        {
-          approx.push_back(a[0][2]);
-        }
-        // 0 <= n and n+m >= len( x ) implies
-        //   len(x)-n <= len( substr( x, n, m ) )
-        if (checkEntailArith(a[0][1]) && checkEntailArith(npm, lenx))
-        {
-          approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
-        }
-      }
-    }
-    else if (aak == STRING_STRREPL)
-    {
-      // over,under-approximations for len( replace( x, y, z ) )
-      // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) )
-      Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
-      Node leny = nm->mkNode(STRING_LENGTH, a[0][1]);
-      Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]);
-      if (isOverApprox)
-      {
-        if (checkEntailArith(leny, lenz))
-        {
-          // len( y ) >= len( z ) implies
-          //   len( x ) >= len( replace( x, y, z ) )
-          approx.push_back(lenx);
-        }
-        else
-        {
-          // len( x ) + len( z ) >= len( replace( x, y, z ) )
-          approx.push_back(nm->mkNode(PLUS, lenx, lenz));
-        }
-      }
-      else
-      {
-        if (checkEntailArith(lenz, leny) || checkEntailArith(lenz, lenx))
-        {
-          // len( y ) <= len( z ) or len( x ) <= len( z ) implies
-          //   len( x ) <= len( replace( x, y, z ) )
-          approx.push_back(lenx);
-        }
-        else
-        {
-          // len( x ) - len( y ) <= len( replace( x, y, z ) )
-          approx.push_back(nm->mkNode(MINUS, lenx, leny));
-        }
-      }
-    }
-    else if (aak == STRING_ITOS)
-    {
-      // over,under-approximations for len( int.to.str( x ) )
-      if (isOverApprox)
-      {
-        if (checkEntailArith(a[0][0], false))
-        {
-          if (checkEntailArith(a[0][0], true))
-          {
-            // x > 0 implies
-            //   x >= len( int.to.str( x ) )
-            approx.push_back(a[0][0]);
-          }
-          else
-          {
-            // x >= 0 implies
-            //   x+1 >= len( int.to.str( x ) )
-            approx.push_back(
-                nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0]));
-          }
-        }
-      }
-      else
-      {
-        if (checkEntailArith(a[0][0]))
-        {
-          // x >= 0 implies
-          //   len( int.to.str( x ) ) >= 1
-          approx.push_back(nm->mkConst(Rational(1)));
-        }
-        // other crazy things are possible here, e.g.
-        // len( int.to.str( len( y ) + 10 ) ) >= 2
-      }
-    }
-  }
-  else if (ak == STRING_STRIDOF)
-  {
-    // over,under-approximations for indexof( x, y, n )
-    if (isOverApprox)
-    {
-      Node lenx = nm->mkNode(STRING_LENGTH, a[0]);
-      Node leny = nm->mkNode(STRING_LENGTH, a[1]);
-      if (checkEntailArith(lenx, leny))
-      {
-        // len( x ) >= len( y ) implies
-        //   len( x ) - len( y ) >= indexof( x, y, n )
-        approx.push_back(nm->mkNode(MINUS, lenx, leny));
-      }
-      else
-      {
-        // len( x ) >= indexof( x, y, n )
-        approx.push_back(lenx);
-      }
-    }
-    else
-    {
-      // TODO?:
-      // contains( substr( x, n, len( x ) ), y ) implies
-      //   n <= indexof( x, y, n )
-      // ...hard to test, runs risk of non-termination
-
-      // -1 <= indexof( x, y, n )
-      approx.push_back(nm->mkConst(Rational(-1)));
-    }
-  }
-  else if (ak == STRING_STOI)
-  {
-    // over,under-approximations for str.to.int( x )
-    if (isOverApprox)
-    {
-      // TODO?:
-      // y >= 0 implies
-      //   y >= str.to.int( int.to.str( y ) )
-    }
-    else
-    {
-      // -1 <= str.to.int( x )
-      approx.push_back(nm->mkConst(Rational(-1)));
-    }
-  }
-  Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl;
-}
-
-bool SequencesRewriter::checkEntailMultisetSubset(Node a, Node b)
-{
-  std::vector<Node> avec;
-  utils::getConcat(getMultisetApproximation(a), avec);
-  std::vector<Node> bvec;
-  utils::getConcat(b, bvec);
-
-  std::map<Node, unsigned> num_nconst[2];
-  std::map<Node, unsigned> num_const[2];
-  for (unsigned j = 0; j < 2; j++)
-  {
-    std::vector<Node>& jvec = j == 0 ? avec : bvec;
-    for (const Node& cc : jvec)
-    {
-      if (cc.isConst())
-      {
-        num_const[j][cc]++;
-      }
-      else
-      {
-        num_nconst[j][cc]++;
-      }
-    }
-  }
-  bool ms_success = true;
-  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
-  {
-    if (nncp.second > num_nconst[1][nncp.first])
-    {
-      ms_success = false;
-      break;
-    }
-  }
-  if (ms_success)
-  {
-    // count the number of constant characters in the first argument
-    std::map<Node, unsigned> count_const[2];
-    std::vector<Node> chars;
-    for (unsigned j = 0; j < 2; j++)
-    {
-      for (std::pair<const Node, unsigned>& ncp : num_const[j])
-      {
-        Node cn = ncp.first;
-        Assert(cn.isConst());
-        std::vector<Node> cnChars = Word::getChars(cn);
-        for (const Node& ch : cnChars)
-        {
-          count_const[j][ch] += ncp.second;
-          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
-          {
-            chars.push_back(ch);
-          }
-        }
-      }
-    }
-    Trace("strings-entail-ms-ss")
-        << "For " << a << " and " << b << " : " << std::endl;
-    for (const Node& ch : chars)
-    {
-      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
-      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
-      Trace("strings-entail-ms-ss")
-          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
-      if (count_const[0][ch] < count_const[1][ch])
-      {
-        return true;
-      }
-    }
-
-    // TODO (#1180): count the number of 2,3,4,.. character substrings
-    // for example:
-    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
-    // since the second argument contains more occurrences of "bb".
-    // note this is orthogonal reasoning to inductive reasoning
-    // via regular membership reduction in Liang et al CAV 2015.
-  }
-  return false;
-}
-
-Node SequencesRewriter::checkEntailHomogeneousString(Node a)
-{
-  std::vector<Node> avec;
-  utils::getConcat(getMultisetApproximation(a), avec);
-
-  bool cValid = false;
-  Node c;
-  for (const Node& ac : avec)
-  {
-    if (ac.isConst())
-    {
-      std::vector<Node> acv = Word::getChars(ac);
-      for (const Node& cc : acv)
-      {
-        if (!cValid)
-        {
-          cValid = true;
-          c = cc;
-        }
-        else if (c != cc)
-        {
-          // Found a different character
-          return Node::null();
-        }
-      }
-    }
-    else
-    {
-      // Could produce a different character
-      return Node::null();
-    }
-  }
-
-  if (!cValid)
-  {
-    return Word::mkEmptyWord(a.getType());
-  }
-
-  return c;
-}
-
-Node SequencesRewriter::getMultisetApproximation(Node a)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  if (a.getKind() == STRING_SUBSTR)
-  {
-    return a[0];
-  }
-  else if (a.getKind() == STRING_STRREPL)
-  {
-    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
-  }
-  else if (a.getKind() == STRING_CONCAT)
-  {
-    NodeBuilder<> nb(STRING_CONCAT);
-    for (const Node& ac : a)
-    {
-      nb << getMultisetApproximation(ac);
-    }
-    return nb.constructNode();
-  }
-  else
-  {
-    return a;
-  }
-}
-
-bool SequencesRewriter::checkEntailArithWithEqAssumption(Node assumption,
-                                                         Node a,
-                                                         bool strict)
-{
-  Assert(assumption.getKind() == kind::EQUAL);
-  Assert(Rewriter::rewrite(assumption) == assumption);
-
-  // Find candidates variables to compute substitutions for
-  std::unordered_set<Node, NodeHashFunction> candVars;
-  std::vector<Node> toVisit = {assumption};
-  while (!toVisit.empty())
-  {
-    Node curr = toVisit.back();
-    toVisit.pop_back();
-
-    if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT
-        || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL)
-    {
-      for (const auto& currChild : curr)
-      {
-        toVisit.push_back(currChild);
-      }
-    }
-    else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH)
-    {
-      candVars.insert(curr);
-    }
-    else if (curr.getKind() == kind::STRING_LENGTH)
-    {
-      candVars.insert(curr);
-    }
-  }
-
-  // Check if any of the candidate variables are in n
-  Node v;
-  Assert(toVisit.empty());
-  toVisit.push_back(a);
-  while (!toVisit.empty())
-  {
-    Node curr = toVisit.back();
-    toVisit.pop_back();
-
-    for (const auto& currChild : curr)
-    {
-      toVisit.push_back(currChild);
-    }
-
-    if (candVars.find(curr) != candVars.end())
-    {
-      v = curr;
-      break;
-    }
-  }
-
-  if (v.isNull())
-  {
-    // No suitable candidate found
-    return false;
-  }
-
-  Node solution = ArithMSum::solveEqualityFor(assumption, v);
-  if (solution.isNull())
-  {
-    // Could not solve for v
-    return false;
-  }
-
-  a = a.substitute(TNode(v), TNode(solution));
-  return checkEntailArith(a, strict);
-}
-
-bool SequencesRewriter::checkEntailArithWithAssumption(Node assumption,
-                                                       Node a,
-                                                       Node b,
-                                                       bool strict)
-{
-  Assert(Rewriter::rewrite(assumption) == assumption);
-
-  NodeManager* nm = NodeManager::currentNM();
-
-  if (!assumption.isConst() && assumption.getKind() != kind::EQUAL)
-  {
-    // We rewrite inequality assumptions from x <= y to x + (str.len s) = y
-    // where s is some fresh string variable. We use (str.len s) because
-    // (str.len s) must be non-negative for the equation to hold.
-    Node x, y;
-    if (assumption.getKind() == kind::GEQ)
-    {
-      x = assumption[0];
-      y = assumption[1];
-    }
-    else
-    {
-      // (not (>= s t)) --> (>= (t - 1) s)
-      Assert(assumption.getKind() == kind::NOT
-             && assumption[0].getKind() == kind::GEQ);
-      x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1)));
-      y = assumption[0][0];
-    }
-
-    Node s = nm->mkBoundVar("slackVal", nm->stringType());
-    Node slen = nm->mkNode(kind::STRING_LENGTH, s);
-    assumption = Rewriter::rewrite(
-        nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen)));
-  }
-
-  Node diff = nm->mkNode(kind::MINUS, a, b);
-  bool res = false;
-  if (assumption.isConst())
-  {
-    bool assumptionBool = assumption.getConst<bool>();
-    if (assumptionBool)
-    {
-      res = checkEntailArith(diff, strict);
-    }
-    else
-    {
-      res = true;
-    }
-  }
-  else
-  {
-    res = checkEntailArithWithEqAssumption(assumption, diff, strict);
-  }
-  return res;
-}
-
-bool SequencesRewriter::checkEntailArithWithAssumptions(
-    std::vector<Node> assumptions, Node a, Node b, bool strict)
-{
-  // TODO: We currently try to show the entailment with each assumption
-  // independently. In the future, we should make better use of multiple
-  // assumptions.
-  bool res = false;
-  for (const auto& assumption : assumptions)
-  {
-    Assert(Rewriter::rewrite(assumption) == assumption);
-
-    if (checkEntailArithWithAssumption(assumption, a, b, strict))
-    {
-      res = true;
-      break;
-    }
-  }
-  return res;
-}
-
-Node SequencesRewriter::getConstantArithBound(Node a, bool isLower)
-{
-  Assert(Rewriter::rewrite(a) == a);
-  Node ret;
-  if (a.isConst())
-  {
-    ret = a;
-  }
-  else if (a.getKind() == kind::STRING_LENGTH)
-  {
-    if (isLower)
-    {
-      ret = NodeManager::currentNM()->mkConst(Rational(0));
-    }
-  }
-  else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
-  {
-    std::vector<Node> children;
-    bool success = true;
-    for (unsigned i = 0; i < a.getNumChildren(); i++)
-    {
-      Node ac = getConstantArithBound(a[i], isLower);
-      if (ac.isNull())
-      {
-        ret = ac;
-        success = false;
-        break;
-      }
-      else
-      {
-        if (ac.getConst<Rational>().sgn() == 0)
-        {
-          if (a.getKind() == kind::MULT)
-          {
-            ret = ac;
-            success = false;
-            break;
-          }
-        }
-        else
-        {
-          if (a.getKind() == kind::MULT)
-          {
-            if ((ac.getConst<Rational>().sgn() > 0) != isLower)
-            {
-              ret = Node::null();
-              success = false;
-              break;
-            }
-          }
-          children.push_back(ac);
-        }
-      }
-    }
-    if (success)
-    {
-      if (children.empty())
-      {
-        ret = NodeManager::currentNM()->mkConst(Rational(0));
-      }
-      else if (children.size() == 1)
-      {
-        ret = children[0];
-      }
-      else
-      {
-        ret = NodeManager::currentNM()->mkNode(a.getKind(), children);
-        ret = Rewriter::rewrite(ret);
-      }
-    }
-  }
-  Trace("strings-rewrite-cbound")
-      << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a
-      << " is " << ret << std::endl;
-  Assert(ret.isNull() || ret.isConst());
-  // entailment check should be at least as powerful as computing a lower bound
-  Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() < 0
-         || checkEntailArith(a, false));
-  Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() <= 0
-         || checkEntailArith(a, true));
-  return ret;
-}
-
-Node SequencesRewriter::getFixedLengthForRegexp(Node n)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  if (n.getKind() == STRING_TO_REGEXP)
-  {
-    Node ret = nm->mkNode(STRING_LENGTH, n[0]);
-    ret = Rewriter::rewrite(ret);
-    if (ret.isConst())
-    {
-      return ret;
-    }
-  }
-  else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
-  {
-    return nm->mkConst(Rational(1));
-  }
-  else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
-  {
-    Node ret;
-    for (const Node& nc : n)
-    {
-      Node flc = getFixedLengthForRegexp(nc);
-      if (flc.isNull() || (!ret.isNull() && ret != flc))
-      {
-        return Node::null();
-      }
-      else if (ret.isNull())
-      {
-        // first time
-        ret = flc;
+        return Node::null();
        }
+      std::vector<Node> snChildren;
+      utils::getConcat(sn, snChildren);
+      concatBuilder.append(snChildren);
      }
-    return ret;
+    res = concatBuilder.constructNode();
    }
-  else if (n.getKind() == REGEXP_CONCAT)
+  else if (len.getKind() == MULT && len.getNumChildren() == 2
+           && len[0].isConst())
    {
-    NodeBuilder<> nb(PLUS);
-    for (const Node& nc : n)
-    {
-      Node flc = getFixedLengthForRegexp(nc);
-      if (flc.isNull())
-      {
-        return flc;
-      }
-      nb << flc;
-    }
-    Node ret = nb.constructNode();
-    ret = Rewriter::rewrite(ret);
-    return ret;
-  }
-  return Node::null();
-}
+    // c * x -> norm(x) repeated c times
+    Rational ratReps = len[0].getConst<Rational>();
+    Assert(ratReps.getDenominator() == 1);
+    Integer intReps = ratReps.getNumerator();
  
-bool SequencesRewriter::checkEntailArithInternal(Node a)
-{
-  Assert(Rewriter::rewrite(a) == a);
-  // check whether a >= 0
-  if (a.isConst())
-  {
-    return a.getConst<Rational>().sgn() >= 0;
-  }
-  else if (a.getKind() == kind::STRING_LENGTH)
-  {
-    // str.len( t ) >= 0
-    return true;
-  }
-  else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
-  {
-    for (unsigned i = 0; i < a.getNumChildren(); i++)
+    Node nRep = canonicalStrForSymbolicLength(len[1], stype);
+    std::vector<Node> nRepChildren;
+    utils::getConcat(nRep, nRepChildren);
+    NodeBuilder<> concatBuilder(STRING_CONCAT);
+    for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
      {
-      if (!checkEntailArithInternal(a[i]))
-      {
-        return false;
-      }
+      concatBuilder.append(nRepChildren);
      }
-    // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0
-    return true;
-  }
-
-  return false;
-}
-
-Node SequencesRewriter::decomposeSubstrChain(Node s,
-                                             std::vector<Node>& ss,
-                                             std::vector<Node>& ls)
-{
-  Assert(ss.empty());
-  Assert(ls.empty());
-  while (s.getKind() == STRING_SUBSTR)
-  {
-    ss.push_back(s[1]);
-    ls.push_back(s[2]);
-    s = s[0];
-  }
-  std::reverse(ss.begin(), ss.end());
-  std::reverse(ls.begin(), ls.end());
-  return s;
-}
-
-Node SequencesRewriter::mkSubstrChain(Node base,
-                                      const std::vector<Node>& ss,
-                                      const std::vector<Node>& ls)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  for (unsigned i = 0, size = ss.size(); i < size; i++)
-  {
-    base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
+    res = concatBuilder.constructNode();
    }
-  return base;
-}
-
-Node SequencesRewriter::getStringOrEmpty(Node n)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  Node res;
-  while (res.isNull())
+  else if (len.getKind() == STRING_LENGTH)
    {
-    switch (n.getKind())
-    {
-      case kind::STRING_STRREPL:
-      {
-        Node empty = nm->mkConst(::CVC4::String(""));
-        if (n[0] == empty)
-        {
-          // (str.replace "" x y) --> y
-          n = n[2];
-          break;
-        }
-
-        if (checkEntailLengthOne(n[0]) && n[2] == empty)
-        {
-          // (str.replace "A" x "") --> "A"
-          res = n[0];
-          break;
-        }
-
-        res = n;
-        break;
-      }
-      case kind::STRING_SUBSTR:
-      {
-        if (checkEntailLengthOne(n[0]))
-        {
-          // (str.substr "A" x y) --> "A"
-          res = n[0];
-          break;
-        }
-        res = n;
-        break;
-      }
-      default:
-      {
-        res = n;
-        break;
-      }
-    }
+    // len(x) -> x
+    res = len[0];
    }
    return res;
  }
  
-bool SequencesRewriter::inferZerosInSumGeq(Node x,
-                                           std::vector<Node>& ys,
-                                           std::vector<Node>& zeroYs)
-{
-  Assert(zeroYs.empty());
-
-  NodeManager* nm = NodeManager::currentNM();
-
-  // Check if we can show that y1 + ... + yn >= x
-  Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0];
-  if (!checkEntailArith(sum, x))
-  {
-    return false;
-  }
-
-  // Try to remove yi one-by-one and check if we can still show:
-  //
-  // y1 + ... + yi-1 +  yi+1 + ... + yn >= x
-  //
-  // If that's the case, we know that yi can be zero and the inequality still
-  // holds.
-  size_t i = 0;
-  while (i < ys.size())
-  {
-    Node yi = ys[i];
-    std::vector<Node>::iterator pos = ys.erase(ys.begin() + i);
-    if (ys.size() > 1)
-    {
-      sum = nm->mkNode(PLUS, ys);
-    }
-    else
-    {
-      sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0));
-    }
-
-    if (checkEntailArith(sum, x))
-    {
-      zeroYs.push_back(yi);
-    }
-    else
-    {
-      ys.insert(pos, yi);
-      i++;
-    }
-  }
-  return true;
-}
-
-Node SequencesRewriter::inferEqsFromContains(Node x, Node y)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  Node emp = nm->mkConst(String(""));
-  Assert(x.getType() == y.getType());
-  TypeNode stype = x.getType();
-
-  Node xLen = nm->mkNode(STRING_LENGTH, x);
-  std::vector<Node> yLens;
-  if (y.getKind() != STRING_CONCAT)
-  {
-    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
-  }
-  else
-  {
-    for (const Node& yi : y)
-    {
-      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
-    }
-  }
-
-  std::vector<Node> zeroLens;
-  if (x == emp)
-  {
-    // If x is the empty string, then all ys must be empty, too, and we can
-    // skip the expensive checks. Note that this is just a performance
-    // optimization.
-    zeroLens.swap(yLens);
-  }
-  else
-  {
-    // Check if we can infer that str.len(x) <= str.len(y). If that is the
-    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
-    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
-    // true. The terms that can have length zero without making the inequality
-    // false must be all be empty if (str.contains x y) is true.
-    if (!inferZerosInSumGeq(xLen, yLens, zeroLens))
-    {
-      // We could not prove that the inequality holds
-      return Node::null();
-    }
-    else if (yLens.size() == y.getNumChildren())
-    {
-      // We could only prove that the inequality holds but not that any of the
-      // ys must be empty
-      return nm->mkNode(EQUAL, x, y);
-    }
-  }
-
-  if (y.getKind() != STRING_CONCAT)
-  {
-    if (zeroLens.size() == 1)
-    {
-      // y is not a concatenation and we found that it must be empty, so just
-      // return (= y "")
-      Assert(zeroLens[0][0] == y);
-      return nm->mkNode(EQUAL, y, emp);
-    }
-    else
-    {
-      Assert(yLens.size() == 1 && yLens[0][0] == y);
-      return nm->mkNode(EQUAL, x, y);
-    }
-  }
-
-  std::vector<Node> cs;
-  for (const Node& yiLen : yLens)
-  {
-    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
-    cs.push_back(yiLen[0]);
-  }
-
-  NodeBuilder<> nb(AND);
-  // (= x (str.++ y1' ... ym'))
-  if (!cs.empty())
-  {
-    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
-  }
-  // (= y1'' "") ... (= yk'' "")
-  for (const Node& zeroLen : zeroLens)
-  {
-    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
-    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
-  }
-
-  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
-  return nb.constructNode();
-}
-
-std::pair<bool, std::vector<Node> > SequencesRewriter::collectEmptyEqs(Node x)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  Node empty = nm->mkConst(::CVC4::String(""));
-
-  // Collect the equalities of the form (= x "") (sorted)
-  std::set<TNode> emptyNodes;
-  bool allEmptyEqs = true;
-  if (x.getKind() == kind::EQUAL)
-  {
-    if (x[0] == empty)
-    {
-      emptyNodes.insert(x[1]);
-    }
-    else if (x[1] == empty)
-    {
-      emptyNodes.insert(x[0]);
-    }
-    else
-    {
-      allEmptyEqs = false;
-    }
-  }
-  else if (x.getKind() == kind::AND)
-  {
-    for (const Node& c : x)
-    {
-      if (c.getKind() == kind::EQUAL)
-      {
-        if (c[0] == empty)
-        {
-          emptyNodes.insert(c[1]);
-        }
-        else if (c[1] == empty)
-        {
-          emptyNodes.insert(c[0]);
-        }
-      }
-      else
-      {
-        allEmptyEqs = false;
-      }
-    }
-  }
-
-  if (emptyNodes.size() == 0)
-  {
-    allEmptyEqs = false;
-  }
-
-  return std::make_pair(
-      allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
-}
  
  Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
  {
@@ -5646,3 +3142,7 @@ Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
    }
    return ret;
  }
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h

index 0e5cd5705bc4b66b1257975e8fbaf9bf4b9be709..7391a7bd0f274dc96977b4ebe3d2e353d9def2ab 100644 (file)
--- a/src/theory/strings/sequences_rewriter.h
+++ b/src/theory/strings/sequences_rewriter.h
@@ -18,14 +18,11 @@
  #ifndef CVC4__THEORY__STRINGS__SEQUENCES_REWRITER_H
  #define CVC4__THEORY__STRINGS__SEQUENCES_REWRITER_H
  
-#include <climits>
-#include <utility>
  #include <vector>
  
-#include "expr/attribute.h"
+#include "expr/node.h"
  #include "theory/strings/rewrites.h"
  #include "theory/theory_rewriter.h"
-#include "theory/type_enumerator.h"
  
  namespace CVC4 {
  namespace theory {
@@ -34,69 +31,6 @@ namespace strings {
  class SequencesRewriter : public TheoryRewriter
  {
   protected:
-  /** simple regular expression consume
-   *
-   * This method is called when we are rewriting a membership of the form
-   *   s1 ++ ... ++ sn in r1 ++ ... ++ rm
-   * We have that mchildren consists of the strings s1...sn, and children
-   * consists of the regular expressions r1...rm.
-   *
-   * This method tries to strip off parts of the concatenation terms. It updates
-   * the vectors such that the resulting vectors are such that the membership
-   * mchildren[n'...n''] in children[m'...m''] is equivalent to the input
-   * membership. The argument dir indicates the direction to consider, where
-   * 0 means strip off the front, 1 off the back, and < 0 off of both.
-   *
-   * If this method returns the false node, then we have inferred that no
-   * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or
-   * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null
-   * node.
-   *
-   * For example, given input
-   *   mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0,
-   * this method updates:
-   *   mchildren = { "b", x }, children = { ("cd")* }
-   * and returns null.
-   *
-   * For example, given input
-   *   { x, "abb", x }, { [[x]], ["a"..."b"], allchar, [[y]], [[x]]} and dir=-1,
-   * this method updates:
-   *   { "b" }, { [[y]] }
-   * where [[.]] denotes str.to.re, and returns null.
-   *
-   * Notice that the above requirement for returning false is stronger than
-   * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false.
-   * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false
-   * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb".
-   * We do not return false even though the above membership is equivalent
-   * to false. We do this because the function is used e.g. to test whether a
-   * possible unrolling leads to a conflict. This is demonstrated by the
-   * following examples:
-   *
-   * For example, given input
-   *   { "bb", x }, { "b", ("a")* } and dir=-1,
-   * this method updates:
-   *   { "b" }, { ("a")* }
-   * and returns null.
-   *
-   * For example, given input
-   *   { "cb", x }, { "b", ("a")* } and dir=-1,
-   * this method leaves children and mchildren unchanged and returns false.
-   *
-   * Notice that based on this, we can determine that:
-   *   "cb" ++ x  in ( "b" ++ ("a")* )*
-   * is equivalent to false, whereas we cannot determine that:
-   *   "bb" ++ x  in ( "b" ++ ("a")* )*
-   * is equivalent to false.
-   */
-  static Node simpleRegexpConsume(std::vector<Node>& mchildren,
-                                  std::vector<Node>& children,
-                                  int dir = -1);
-  static bool isConstRegExp(TNode t);
-  static bool testConstStringInRegExp(CVC4::String& s,
-                                      unsigned int index_start,
-                                      TNode r);
-
    /** rewrite regular expression concatenation
     *
     * This is the entry point for post-rewriting applications of re.++.
@@ -159,12 +93,6 @@ class SequencesRewriter : public TheoryRewriter
     */
    static Node rewriteMembership(TNode node);
  
-  static bool hasEpsilonNode(TNode node);
-  /** check entail arithmetic internal
-   * Returns true if we can show a >= 0 always.
-   * a is in rewritten form.
-   */
-  static bool checkEntailArithInternal(Node a);
    /** rewrite string equality extended
     *
     * This method returns a formula that is equivalent to the equality between
@@ -292,233 +220,6 @@ class SequencesRewriter : public TheoryRewriter
     */
    static Node rewriteStringToCode(Node node);
  
-  static Node splitConstant(Node a, Node b, int& index, bool isRev);
-  /** can constant contain list
-   * return true if constant c can contain the list l in order
-   * firstc/lastc store which indices in l were used to determine the return
-   * value.
-   *   (This is typically used when this function returns false, for minimizing
-   * explanations)
-   *
-   * For example:
-   *   canConstantContainList( "abc", { x, "c", y } ) returns true
-   *     firstc/lastc are updated to 1/1
-   *   canConstantContainList( "abc", { x, "d", y } ) returns false
-   *     firstc/lastc are updated to 1/1
-   *   canConstantContainList( "abcdef", { x, "b", y, "a", z, "c", w }
-   *     returns false
-   *     firstc/lastc are updated to 1/3
-   *   canConstantContainList( "abcdef", { x, "b", y, "e", z, "c", w }
-   *     returns false
-   *     firstc/lastc are updated to 1/5
-   */
-  static bool canConstantContainList(Node c,
-                                     std::vector<Node>& l,
-                                     int& firstc,
-                                     int& lastc);
-  /** can constant contain concat
-   * same as above but with n = str.++( l ) instead of l
-   */
-  static bool canConstantContainConcat(Node c, Node n, int& firstc, int& lastc);
-
-  /** strip symbolic length
-   *
-   * This function strips off components of n1 whose length is less than
-   * or equal to argument curr, and stores them in nr. The direction
-   * dir determines whether the components are removed from the start
-   * or end of n1.
-   *
-   * In detail, this function updates n1 to n1' such that:
-   *   If dir=1,
-   *     n1 = str.++( nr, n1' )
-   *   If dir=-1
-   *     n1 = str.++( n1', nr )
-   * It updates curr to curr' such that:
-   *   curr' = curr - str.len( str.++( nr ) ), and
-   *   curr' >= 0
-   * where the latter fact is determined by checkArithEntail.
-   *
-   * This function returns true if n1 is modified.
-   *
-   * For example:
-   *
-   *  stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)+1 )
-   *    returns true
-   *    n1 is updated to { "bc", y }
-   *    nr is updated to { x, "a" }
-   *    curr is updated to 0   *
-   *
-   * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)-1 )
-   *    returns false
-   *
-   *  stripSymbolicLength( { y, "abc", x }, {}, 1, str.len(x)+1 )
-   *    returns false
-   *
-   *  stripSymbolicLength( { x, "abc", y }, {}, -1, 2*str.len(y)+4 )
-   *    returns true
-   *    n1 is updated to { x }
-   *    nr is updated to { "abc", y }
-   *    curr is updated to str.len(y)+1
-   */
-  static bool stripSymbolicLength(std::vector<Node>& n1,
-                                  std::vector<Node>& nr,
-                                  int dir,
-                                  Node& curr);
-  /** component contains
-   * This function is used when rewriting str.contains( t1, t2 ), where
-   * n1 is the vector form of t1
-   * n2 is the vector form of t2
-   *
-   * If this function returns n>=0 for some n, then
-   *    n1 = { x1...x{n-1} xn...x{n+s} x{n+s+1}...xm },
-   *    n2 = { y1...ys },
-   *    y1 is a suffix of xn,
-   *    y2...y{s-1} = x{n+1}...x{n+s-1}, and
-   *    ys is a prefix of x{n+s}
-   * Otherwise it returns -1.
-   *
-   * This function may update n1 if computeRemainder = true.
-   *   We maintain the invariant that the resulting value n1'
-   *   of n1 after this function is such that:
-   *     n1 = str.++( nb, n1', ne )
-   * The vectors nb and ne have the following properties.
-   * If computeRemainder = true, then
-   *   If remainderDir != -1, then
-   *     ne is { x{n+s}' x{n+s+1}...xm }
-   *     where x{n+s} = str.++( ys, x{n+s}' ).
-   *   If remainderDir != 1, then
-   *     nb is { x1, ..., x{n-1}, xn' }
-   *     where xn = str.++( xn', y1 ).
-   *
-   * For example:
-   *
-   * componentContains({ x, "abc", x }, { "b" }, {}, true, 0)
-   *   returns 1,
-   *   n1 is updated to { "b" },
-   *   nb is updated to { x, "a" },
-   *   ne is updated to { "c", x }
-   *
-   * componentContains({ x, "abc", x }, { "b" }, {}, true, 1)
-   *   returns 1,
-   *   n1 is updated to { x, "ab" },
-   *   ne is updated to { "c", x }
-   *
-   * componentContains({ y, z, "abc", x, "def" }, { "c", x, "de" }, {}, true, 1)
-   *   returns 2,
-   *   n1 is updated to { y, z, "abc", x, "de" },
-   *   ne is updated to { "f" }
-   *
-   * componentContains({ y, "abc", x, "def" }, { "c", x, "de" }, {}, true, -1)
-   *   returns 1,
-   *   n1 is updated to { "c", x, "def" },
-   *   nb is updated to { y, "ab" }
-   */
-  static int componentContains(std::vector<Node>& n1,
-                               std::vector<Node>& n2,
-                               std::vector<Node>& nb,
-                               std::vector<Node>& ne,
-                               bool computeRemainder = false,
-                               int remainderDir = 0);
-  /** component contains base
-   *
-   * This function is a helper for the above function.
-   *
-   * It returns true if n2 is contained in n1 with the following
-   * restrictions:
-   *   If dir=1, then n2 must be a suffix of n1.
-   *   If dir=-1, then n2 must be a prefix of n1.
-   *
-   * If computeRemainder is true, then n1rb and n1re are
-   * updated such that :
-   *   n1 = str.++( n1rb, n2, n1re )
-   * where a null value of n1rb and n1re indicates the
-   * empty string.
-   *
-   * For example:
-   *
-   * componentContainsBase("cabe", "ab", n1rb, n1re, 1, false)
-   *   returns false.
-   *
-   * componentContainsBase("cabe", "ab", n1rb, n1re, 0, true)
-   *   returns true,
-   *   n1rb is set to "c",
-   *   n1re is set to "e".
-   *
-   * componentContainsBase(y, str.substr(y,0,5), n1rb, n1re, -1, true)
-   *   returns true,
-   *   n1re is set to str.substr(y,5,str.len(y)).
-   *
-   *
-   * Notice that this function may return false when it cannot compute a
-   * remainder when it otherwise would have returned true. For example:
-   *
-   * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, false)
-   *   returns true.
-   *
-   * Hence, we know that str.substr(y,x,z) is contained in y. However:
-   *
-   * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, true)
-   *   returns false.
-   *
-   * The reason is since computeRemainder=true, it must be that
-   *   y = str.++( n1rb, str.substr(y,x,z), n1re )
-   * for some n1rb, n1re. However, to construct such n1rb, n1re would require
-   * e.g. the terms:
-   *   y = str.++( ite( x+z < 0 OR x < 0, "", str.substr(y,0,x) ),
-   *               str.substr(y,x,z),
-   *               ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) )
-   *
-   * Since we do not wish to introduce ITE terms in the rewriter, we instead
-   * return false, indicating that we cannot compute the remainder.
-   */
-  static bool componentContainsBase(
-      Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder);
-  /** strip constant endpoints
-   * This function is used when rewriting str.contains( t1, t2 ), where
-   * n1 is the vector form of t1
-   * n2 is the vector form of t2
-   *
-   * It modifies n1 to a new vector n1' such that:
-   * (1) str.contains( str.++( n1 ), str.++( n2 ) ) is equivalent to
-   * str.contains( str.++( n1' ), str.++( n2 ) )
-   * (2) str.++( n1 ) = str.++( nb, n1', ne )
-   *
-   * "dir" is the direction in which we can modify n1:
-   * if dir = 1, then we allow dropping components from the front of n1,
-   * if dir = -1, then we allow dropping components from the back of n1,
-   * if dir = 0, then we allow dropping components from either.
-   *
-   * It returns true if n1 is modified.
-   *
-   * For example:
-   * stripConstantEndpoints({ "ab", x, "de" }, { "c" }, {}, {}, 1)
-   *   returns true,
-   *   n1 is updated to { x, "de" }
-   *   nb is updated to { "ab" }
-   * stripConstantEndpoints({ "ab", x, "de" }, { "bd" }, {}, {}, 0)
-   *   returns true,
-   *   n1 is updated to { "b", x, "d" }
-   *   nb is updated to { "a" }
-   *   ne is updated to { "e" }
-   * stripConstantEndpoints({ "ad", substr("ccc",x,y) }, { "d" }, {}, {}, -1)
-   *   returns true,
-   *   n1 is updated to {"ad"}
-   *   ne is updated to { substr("ccc",x,y) }
-   */
-  static bool stripConstantEndpoints(std::vector<Node>& n1,
-                                     std::vector<Node>& n2,
-                                     std::vector<Node>& nb,
-                                     std::vector<Node>& ne,
-                                     int dir = 0);
-
-  /**
-   * Given a symbolic length n, returns the canonical string (of type stype)
-   * for that length. For example if n is constant, this function returns a
-   * string consisting of "A" repeated n times. Returns the null node if no such
-   * string exists.
-   */
-  static Node canonicalStrForSymbolicLength(Node n, TypeNode stype);
-
    /** length preserving rewrite
     *
     * Given input n, this returns a string n' whose length is equivalent to n.
@@ -528,289 +229,12 @@ class SequencesRewriter : public TheoryRewriter
    static Node lengthPreserveRewrite(Node n);
  
    /**
-   * Checks whether a string term `a` is entailed to contain or not contain a
-   * string term `b`.
-   *
-   * @param a The string that is checked whether it contains `b`
-   * @param b The string that is checked whether it is contained in `a`
-   * @param fullRewriter Determines whether the function can use the full
-   * rewriter or only `rewriteContains()` (useful for avoiding loops)
-   * @return true node if it can be shown that `a` contains `b`, false node if
-   * it can be shown that `a` does not contain `b`, null node otherwise
-   */
-  static Node checkEntailContains(Node a, Node b, bool fullRewriter = true);
-
-  /** entail non-empty
-   *
-   * Checks whether string a is entailed to be non-empty. Is equivalent to
-   * the call checkArithEntail( len( a ), true ).
-   */
-  static bool checkEntailNonEmpty(Node a);
-
-  /**
-   * Checks whether string has at most/exactly length one. Length one strings
-   * can be used for more aggressive rewriting because there is guaranteed that
-   * it cannot be overlap multiple components in a string concatenation.
-   *
-   * @param s The string to check
-   * @param strict If true, the string must have exactly length one, otherwise
-   * at most length one
-   * @return True if the string has at most/exactly length one, false otherwise
-   */
-  static bool checkEntailLengthOne(Node s, bool strict = false);
-
-  /** check arithmetic entailment equal
-   * Returns true if it is always the case that a = b.
-   */
-  static bool checkEntailArithEq(Node a, Node b);
-  /** check arithmetic entailment
-   * Returns true if it is always the case that a >= b,
-   * and a>b if strict is true.
-   */
-  static bool checkEntailArith(Node a, Node b, bool strict = false);
-  /** check arithmetic entailment
-   * Returns true if it is always the case that a >= 0.
-   */
-  static bool checkEntailArith(Node a, bool strict = false);
-  /** check arithmetic entailment with approximations
-   *
-   * Returns true if it is always the case that a >= 0. We expect that a is in
-   * rewritten form.
-   *
-   * This function uses "approximation" techniques that under-approximate
-   * the value of a for the purposes of showing the entailment holds. For
-   * example, given:
-   *   len( x ) - len( substr( y, 0, len( x ) ) )
-   * Since we know that len( substr( y, 0, len( x ) ) ) <= len( x ), the above
-   * term can be under-approximated as len( x ) - len( x ) = 0, which is >= 0,
-   * and thus the entailment len( x ) - len( substr( y, 0, len( x ) ) ) >= 0
-   * holds.
-   */
-  static bool checkEntailArithApprox(Node a);
-  /** Get arithmetic approximations
-   *
-   * This gets the (set of) arithmetic approximations for term a and stores
-   * them in approx. If isOverApprox is true, these are over-approximations
-   * for the value of a, otherwise, they are underapproximations. For example,
-   * an over-approximation for len( substr( y, n, m ) ) is m; an
-   * under-approximation for indexof( x, y, n ) is -1.
-   *
-   * Notice that this function is not generally recursive (although it may make
-   * a small bounded of recursive calls). Instead, it returns the shape
-   * of the approximations for a. For example, an under-approximation
-   * for the term len( replace( substr( x, 0, n ), y, z ) ) returned by this
-   * function might be len( substr( x, 0, n ) ) - len( y ), where we don't
-   * consider (recursively) the approximations for len( substr( x, 0, n ) ).
-   */
-  static void getArithApproximations(Node a,
-                                     std::vector<Node>& approx,
-                                     bool isOverApprox = false);
-
-  /**
-   * Checks whether it is always true that `a` is a strict subset of `b` in the
-   * multiset domain.
-   *
-   * Examples:
-   *
-   * a = (str.++ "A" x), b = (str.++ "A" x "B") ---> true
-   * a = (str.++ "A" x), b = (str.++ "B" x "AA") ---> true
-   * a = (str.++ "A" x), b = (str.++ "B" y "AA") ---> false
-   *
-   * @param a The term for which it should be checked if it is a strict subset
-   * of `b` in the multiset domain
-   * @param b The term for which it should be checked if it is a strict
-   * superset of `a` in the multiset domain
-   * @return True if it is always the case that `a` is a strict subset of `b`,
-   * false otherwise.
-   */
-  static bool checkEntailMultisetSubset(Node a, Node b);
-
-  /**
-   * Returns a character `c` if it is always the case that str.in.re(a, c*),
-   * i.e. if all possible values of `a` only consist of `c` characters, and the
-   * null node otherwise. If `a` is the empty string, the function returns an
-   * empty string.
-   *
-   * @param a The node to check for homogeneity
-   * @return If `a` is homogeneous, the only character that it may contain, the
-   * empty string if `a` is empty, and the null node otherwise
-   */
-  static Node checkEntailHomogeneousString(Node a);
-
-  /**
-   * Simplifies a given node `a` s.t. the result is a concatenation of string
-   * terms that can be interpreted as a multiset and which contains all
-   * multisets that `a` could form.
-   *
-   * Examples:
-   *
-   * (str.substr "AA" 0 n) ---> "AA"
-   * (str.replace "AAA" x "BB") ---> (str.++ "AAA" "BB")
-   *
-   * @param a The node to simplify
-   * @return A concatenation that can be interpreted as a multiset
-   */
-  static Node getMultisetApproximation(Node a);
-
-  /**
-   * Checks whether assumption |= a >= 0 (if strict is false) or
-   * assumption |= a > 0 (if strict is true), where assumption is an equality
-   * assumption. The assumption must be in rewritten form.
-   *
-   * Example:
-   *
-   * checkEntailArithWithEqAssumption(x + (str.len y) = 0, -x, false) = true
-   *
-   * Because: x = -(str.len y), so -x >= 0 --> (str.len y) >= 0 --> true
-   */
-  static bool checkEntailArithWithEqAssumption(Node assumption,
-                                               Node a,
-                                               bool strict = false);
-
-  /**
-   * Checks whether assumption |= a >= b (if strict is false) or
-   * assumption |= a > b (if strict is true). The function returns true if it
-   * can be shown that the entailment holds and false otherwise. Assumption
-   * must be in rewritten form. Assumption may be an equality or an inequality.
-   *
-   * Example:
-   *
-   * checkEntailArithWithAssumption(x + (str.len y) = 0, 0, x, false) = true
-   *
-   * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true
-   */
-  static bool checkEntailArithWithAssumption(Node assumption,
-                                             Node a,
-                                             Node b,
-                                             bool strict = false);
-
-  /**
-   * Checks whether assumptions |= a >= b (if strict is false) or
-   * assumptions |= a > b (if strict is true). The function returns true if it
-   * can be shown that the entailment holds and false otherwise. Assumptions
-   * must be in rewritten form. Assumptions may be an equalities or an
-   * inequalities.
-   *
-   * Example:
-   *
-   * checkEntailArithWithAssumptions([x + (str.len y) = 0], 0, x, false) = true
-   *
-   * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true
-   */
-  static bool checkEntailArithWithAssumptions(std::vector<Node> assumptions,
-                                              Node a,
-                                              Node b,
-                                              bool strict = false);
-
-  /** get arithmetic lower bound
-   * If this function returns a non-null Node ret,
-   * then ret is a rational constant and
-   * we know that n >= ret always if isLower is true,
-   * or n <= ret if isLower is false.
-   *
-   * Notice the following invariant.
-   * If getConstantArithBound(a, true) = ret where ret is non-null, then for
-   * strict = { true, false } :
-   *   ret >= strict ? 1 : 0
-   *     if and only if
-   *   checkEntailArith( a, strict ) = true.
-   */
-  static Node getConstantArithBound(Node a, bool isLower = true);
-  /** get length for regular expression
-   *
-   * Given regular expression n, if this method returns a non-null value c, then
-   * x in n entails len( x ) = c.
-   */
-  static Node getFixedLengthForRegexp(Node n);
-  /** decompose substr chain
-   *
-   * If s is substr( ... substr( base, x1, y1 ) ..., xn, yn ), then this
-   * function returns base, adds { x1 ... xn } to ss, and { y1 ... yn } to ls.
-   */
-  static Node decomposeSubstrChain(Node s,
-                                   std::vector<Node>& ss,
-                                   std::vector<Node>& ls);
-  /** make substr chain
-   *
-   * If ss is { x1 ... xn } and ls is { y1 ... yn }, this returns the term
-   * substr( ... substr( base, x1, y1 ) ..., xn, yn ).
-   */
-  static Node mkSubstrChain(Node base,
-                            const std::vector<Node>& ss,
-                            const std::vector<Node>& ls);
-
-  /**
-   * Overapproximates the possible values of node n. This overapproximation
-   * assumes that n can return a value x or the empty string and tries to find
-   * the simplest x such that this holds. In the general case, x is the same as
-   * the input n. This overapproximation can be used to sort terms with the
-   * same possible values in string concatenation for example.
-   *
-   * Example:
-   *
-   * getStringOrEmpty( (str.replace "" x y) ) --> y because (str.replace "" x y)
-   * either returns y or ""
-   *
-   * getStringOrEmpty( (str.substr "ABC" x y) ) --> (str.substr "ABC" x y)
-   * because the function could not compute a simpler
-   */
-  static Node getStringOrEmpty(Node n);
-
-  /**
-   * Given an inequality y1 + ... + yn >= x, removes operands yi s.t. the
-   * original inequality still holds. Returns true if the original inequality
-   * holds and false otherwise. The list of ys is modified to contain a subset
-   * of the original ys.
-   *
-   * Example:
-   *
-   * inferZerosInSumGeq( (str.len x), [ (str.len x), (str.len y), 1 ], [] )
-   * --> returns true with ys = [ (str.len x) ] and zeroYs = [ (str.len y), 1 ]
-   *     (can be used to rewrite the inequality to false)
-   *
-   * inferZerosInSumGeq( (str.len x), [ (str.len y) ], [] )
-   * --> returns false because it is not possible to show
-   *     str.len(y) >= str.len(x)
-   */
-  static bool inferZerosInSumGeq(Node x,
-                                 std::vector<Node>& ys,
-                                 std::vector<Node>& zeroYs);
-
-  /**
-   * Infers a conjunction of equalities that correspond to (str.contains x y)
-   * if it can show that the length of y is greater or equal to the length of
-   * x. If y is a concatentation, we get x = y1 ++ ... ++ yn, the conjunction
-   * is of the form:
-   *
-   * (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
-   *
-   * where each yi'' are yi that must be empty for (= x y) to hold and yi' are
-   * yi that the function could not infer anything about.  Returns a null node
-   * if the function cannot infer that str.len(y) >= str.len(x). Returns (= x
-   * y) if the function can infer that str.len(y) >= str.len(x) but cannot
-   * infer that any of the yi must be empty.
-   */
-  static Node inferEqsFromContains(Node x, Node y);
-
-  /**
-   * Collects equal-to-empty nodes from a conjunction or a single
-   * node. Returns a list of nodes that are compared to empty nodes
-   * and a boolean that indicates whether all nodes in the
-   * conjunction were a comparison with the empty node. The nodes in
-   * the list are sorted and duplicates removed.
-   *
-   * Examples:
-   *
-   * collectEmptyEqs( (= "" x) ) = { true, [x] }
-   * collectEmptyEqs( (and (= "" x) (= "" y)) ) = { true, [x, y] }
-   * collectEmptyEqs( (and (= "A" x) (= "" y) (= "" y)) ) = { false, [y] }
-   *
-   * @param x The conjunction of equalities or a single equality
-   * @return A pair of a boolean that indicates whether the
-   * conjunction consists only of comparisons to the empty string
-   * and the list of nodes that are compared to the empty string
+   * Given a symbolic length n, returns the canonical string (of type stype)
+   * for that length. For example if n is constant, this function returns a
+   * string consisting of "A" repeated n times. Returns the null node if no such
+   * string exists.
     */
-  static std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x);
+  static Node canonicalStrForSymbolicLength(Node n, TypeNode stype);
  }; /* class SequencesRewriter */
  
  }  // namespace strings
diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp

index 7396a5013a0c416e59342d9347ea94be92033738..bdce86d0608e46fe150bb7b9a9ee3cb5d1f2c0d8 100644 (file)
--- a/src/theory/strings/skolem_cache.cpp
+++ b/src/theory/strings/skolem_cache.cpp
@@ -15,7 +15,7 @@
  #include "theory/strings/skolem_cache.h"
  
  #include "theory/rewriter.h"
-#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/arith_entail.h"
  #include "util/rational.h"
  
  using namespace CVC4::kind;
@@ -163,8 +163,8 @@ SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b)
        a = s;
        b = m;
      }
-    else if (SequencesRewriter::checkEntailArith(nm->mkNode(PLUS, n, m),
-                                                 nm->mkNode(STRING_LENGTH, s)))
+    else if (ArithEntail::check(nm->mkNode(PLUS, n, m),
+                                nm->mkNode(STRING_LENGTH, s)))
      {
        // SK_PURIFY((str.substr x n m)) ---> SK_SUFFIX_REM(x, n)
        // if n + m >= (str.len x)
diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp

new file mode 100644 (file)

index 0000000..a1abfab
--- /dev/null
+++ b/src/theory/strings/strings_entail.cpp
@@ -0,0 +1,994 @@
+/*********************                                                        */
+/*! \file strings_entail.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of entailment tests involving strings.
+ **/
+
+#include "theory/strings/strings_entail.h"
+
+#include "expr/node_builder.h"
+#include "theory/rewriter.h"
+#include "theory/strings/arith_entail.h"
+#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+bool StringsEntail::canConstantContainConcat(Node c,
+                                             Node n,
+                                             int& firstc,
+                                             int& lastc)
+{
+  Assert(c.isConst());
+  CVC4::String t = c.getConst<String>();
+  const std::vector<unsigned>& tvec = t.getVec();
+  Assert(n.getKind() == STRING_CONCAT);
+  // must find constant components in order
+  size_t pos = 0;
+  firstc = -1;
+  lastc = -1;
+  for (unsigned i = 0; i < n.getNumChildren(); i++)
+  {
+    if (n[i].isConst())
+    {
+      firstc = firstc == -1 ? i : firstc;
+      lastc = i;
+      CVC4::String s = n[i].getConst<String>();
+      size_t new_pos = t.find(s, pos);
+      if (new_pos == std::string::npos)
+      {
+        return false;
+      }
+      else
+      {
+        pos = new_pos + s.size();
+      }
+    }
+    else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
+    {
+      // find the first occurrence of a digit starting at pos
+      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
+      {
+        pos++;
+      }
+      if (pos == tvec.size())
+      {
+        return false;
+      }
+      // must consume at least one digit here
+      pos++;
+    }
+  }
+  return true;
+}
+
+bool StringsEntail::canConstantContainList(Node c,
+                                           std::vector<Node>& l,
+                                           int& firstc,
+                                           int& lastc)
+{
+  Assert(c.isConst());
+  // must find constant components in order
+  size_t pos = 0;
+  firstc = -1;
+  lastc = -1;
+  for (unsigned i = 0; i < l.size(); i++)
+  {
+    if (l[i].isConst())
+    {
+      firstc = firstc == -1 ? i : firstc;
+      lastc = i;
+      size_t new_pos = Word::find(c, l[i], pos);
+      if (new_pos == std::string::npos)
+      {
+        return false;
+      }
+      else
+      {
+        pos = new_pos + Word::getLength(l[i]);
+      }
+    }
+  }
+  return true;
+}
+
+bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
+                                        std::vector<Node>& nr,
+                                        int dir,
+                                        Node& curr)
+{
+  Assert(dir == 1 || dir == -1);
+  Assert(nr.empty());
+  Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
+  bool ret = false;
+  bool success;
+  unsigned sindex = 0;
+  do
+  {
+    Assert(!curr.isNull());
+    success = false;
+    if (curr != zero && sindex < n1.size())
+    {
+      unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
+      if (n1[sindex_use].isConst())
+      {
+        // could strip part of a constant
+        Node lowerBound =
+            ArithEntail::getConstantBound(Rewriter::rewrite(curr));
+        if (!lowerBound.isNull())
+        {
+          Assert(lowerBound.isConst());
+          Rational lbr = lowerBound.getConst<Rational>();
+          if (lbr.sgn() > 0)
+          {
+            Assert(ArithEntail::check(curr, true));
+            CVC4::String s = n1[sindex_use].getConst<String>();
+            Node ncl =
+                NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
+            Node next_s =
+                NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl);
+            next_s = Rewriter::rewrite(next_s);
+            Assert(next_s.isConst());
+            // we can remove the entire constant
+            if (next_s.getConst<Rational>().sgn() >= 0)
+            {
+              curr = Rewriter::rewrite(
+                  NodeManager::currentNM()->mkNode(MINUS, curr, ncl));
+              success = true;
+              sindex++;
+            }
+            else
+            {
+              // we can remove part of the constant
+              // lower bound minus the length of a concrete string is negative,
+              // hence lowerBound cannot be larger than long max
+              Assert(lbr < Rational(String::maxSize()));
+              curr = Rewriter::rewrite(
+                  NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound));
+              uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
+              Assert(lbsize < s.size());
+              if (dir == 1)
+              {
+                // strip partially from the front
+                nr.push_back(
+                    NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
+                n1[sindex_use] = NodeManager::currentNM()->mkConst(
+                    s.suffix(s.size() - lbsize));
+              }
+              else
+              {
+                // strip partially from the back
+                nr.push_back(
+                    NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
+                n1[sindex_use] = NodeManager::currentNM()->mkConst(
+                    s.prefix(s.size() - lbsize));
+              }
+              ret = true;
+            }
+            Assert(ArithEntail::check(curr));
+          }
+          else
+          {
+            // we cannot remove the constant
+          }
+        }
+      }
+      else
+      {
+        Node next_s = NodeManager::currentNM()->mkNode(
+            MINUS,
+            curr,
+            NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
+        next_s = Rewriter::rewrite(next_s);
+        if (ArithEntail::check(next_s))
+        {
+          success = true;
+          curr = next_s;
+          sindex++;
+        }
+      }
+    }
+  } while (success);
+  if (sindex > 0)
+  {
+    if (dir == 1)
+    {
+      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
+      n1.erase(n1.begin(), n1.begin() + sindex);
+    }
+    else
+    {
+      nr.insert(nr.end(), n1.end() - sindex, n1.end());
+      n1.erase(n1.end() - sindex, n1.end());
+    }
+    ret = true;
+  }
+  return ret;
+}
+
+int StringsEntail::componentContains(std::vector<Node>& n1,
+                                     std::vector<Node>& n2,
+                                     std::vector<Node>& nb,
+                                     std::vector<Node>& ne,
+                                     bool computeRemainder,
+                                     int remainderDir)
+{
+  Assert(nb.empty());
+  Assert(ne.empty());
+  // if n2 is a singleton, we can do optimized version here
+  if (n2.size() == 1)
+  {
+    for (unsigned i = 0; i < n1.size(); i++)
+    {
+      Node n1rb;
+      Node n1re;
+      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
+      {
+        if (computeRemainder)
+        {
+          n1[i] = n2[0];
+          if (remainderDir != -1)
+          {
+            if (!n1re.isNull())
+            {
+              ne.push_back(n1re);
+            }
+            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
+            n1.erase(n1.begin() + i + 1, n1.end());
+          }
+          else if (!n1re.isNull())
+          {
+            n1[i] = Rewriter::rewrite(
+                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
+          }
+          if (remainderDir != 1)
+          {
+            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
+            n1.erase(n1.begin(), n1.begin() + i);
+            if (!n1rb.isNull())
+            {
+              nb.push_back(n1rb);
+            }
+          }
+          else if (!n1rb.isNull())
+          {
+            n1[i] = Rewriter::rewrite(
+                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
+          }
+        }
+        return i;
+      }
+    }
+  }
+  else if (n1.size() >= n2.size())
+  {
+    unsigned diff = n1.size() - n2.size();
+    for (unsigned i = 0; i <= diff; i++)
+    {
+      Node n1rb_first;
+      Node n1re_first;
+      // first component of n2 must be a suffix
+      if (componentContainsBase(n1[i],
+                                n2[0],
+                                n1rb_first,
+                                n1re_first,
+                                1,
+                                computeRemainder && remainderDir != 1))
+      {
+        Assert(n1re_first.isNull());
+        for (unsigned j = 1; j < n2.size(); j++)
+        {
+          // are we in the last component?
+          if (j + 1 == n2.size())
+          {
+            Node n1rb_last;
+            Node n1re_last;
+            // last component of n2 must be a prefix
+            if (componentContainsBase(n1[i + j],
+                                      n2[j],
+                                      n1rb_last,
+                                      n1re_last,
+                                      -1,
+                                      computeRemainder && remainderDir != -1))
+            {
+              Assert(n1rb_last.isNull());
+              if (computeRemainder)
+              {
+                if (remainderDir != -1)
+                {
+                  if (!n1re_last.isNull())
+                  {
+                    ne.push_back(n1re_last);
+                  }
+                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
+                  n1.erase(n1.begin() + i + j + 1, n1.end());
+                  n1[i + j] = n2[j];
+                }
+                if (remainderDir != 1)
+                {
+                  n1[i] = n2[0];
+                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
+                  n1.erase(n1.begin(), n1.begin() + i);
+                  if (!n1rb_first.isNull())
+                  {
+                    nb.push_back(n1rb_first);
+                  }
+                }
+              }
+              return i;
+            }
+            else
+            {
+              break;
+            }
+          }
+          else if (n1[i + j] != n2[j])
+          {
+            break;
+          }
+        }
+      }
+    }
+  }
+  return -1;
+}
+
+bool StringsEntail::componentContainsBase(
+    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
+{
+  Assert(n1rb.isNull());
+  Assert(n1re.isNull());
+
+  NodeManager* nm = NodeManager::currentNM();
+
+  if (n1 == n2)
+  {
+    return true;
+  }
+  else
+  {
+    if (n1.isConst() && n2.isConst())
+    {
+      size_t len1 = Word::getLength(n1);
+      size_t len2 = Word::getLength(n2);
+      if (len2 < len1)
+      {
+        if (dir == 1)
+        {
+          if (Word::suffix(n1, len2) == n2)
+          {
+            if (computeRemainder)
+            {
+              n1rb = Word::prefix(n1, len1 - len2);
+            }
+            return true;
+          }
+        }
+        else if (dir == -1)
+        {
+          if (Word::prefix(n1, len2) == n2)
+          {
+            if (computeRemainder)
+            {
+              n1re = Word::suffix(n1, len1 - len2);
+            }
+            return true;
+          }
+        }
+        else
+        {
+          size_t f = Word::find(n1, n2);
+          if (f != std::string::npos)
+          {
+            if (computeRemainder)
+            {
+              if (f > 0)
+              {
+                n1rb = Word::prefix(n1, f);
+              }
+              if (len1 > f + len2)
+              {
+                n1re = Word::suffix(n1, len1 - (f + len2));
+              }
+            }
+            return true;
+          }
+        }
+      }
+    }
+    else
+    {
+      // cases for:
+      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
+      if (n2.getKind() == STRING_SUBSTR)
+      {
+        if (n2[0] == n1)
+        {
+          bool success = true;
+          Node start_pos = n2[1];
+          Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
+          Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
+          if (dir == 1)
+          {
+            // To be a suffix, start + length must be greater than
+            // or equal to the length of the string.
+            success = ArithEntail::check(end_pos, len_n2s);
+          }
+          else if (dir == -1)
+          {
+            // To be a prefix, must literally start at 0, since
+            //   if we knew it started at <0, it should be rewritten to "",
+            //   if we knew it started at 0, then n2[1] should be rewritten to
+            //   0.
+            success = start_pos.isConst()
+                      && start_pos.getConst<Rational>().sgn() == 0;
+          }
+          if (success)
+          {
+            if (computeRemainder)
+            {
+              // we can only compute the remainder if start_pos and end_pos
+              // are known to be non-negative.
+              if (!ArithEntail::check(start_pos)
+                  || !ArithEntail::check(end_pos))
+              {
+                return false;
+              }
+              if (dir != 1)
+              {
+                n1rb = nm->mkNode(
+                    STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
+              }
+              if (dir != -1)
+              {
+                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
+              }
+            }
+            return true;
+          }
+        }
+      }
+
+      if (!computeRemainder && dir == 0)
+      {
+        if (n1.getKind() == STRING_STRREPL)
+        {
+          // (str.contains (str.replace x y z) w) ---> true
+          // if (str.contains x w) --> true and (str.contains z w) ---> true
+          Node xCtnW = StringsEntail::checkContains(n1[0], n2);
+          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
+          {
+            Node zCtnW = StringsEntail::checkContains(n1[2], n2);
+            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
+            {
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
+                                           std::vector<Node>& n2,
+                                           std::vector<Node>& nb,
+                                           std::vector<Node>& ne,
+                                           int dir)
+{
+  Assert(nb.empty());
+  Assert(ne.empty());
+
+  NodeManager* nm = NodeManager::currentNM();
+  bool changed = false;
+  // for ( forwards, backwards ) direction
+  for (unsigned r = 0; r < 2; r++)
+  {
+    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
+    {
+      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
+      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
+      bool removeComponent = false;
+      Node n1cmp = n1[index0];
+
+      if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
+      {
+        return false;
+      }
+
+      std::vector<Node> sss;
+      std::vector<Node> sls;
+      n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
+      Trace("strings-rewrite-debug2")
+          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
+          << ", dir = " << dir << std::endl;
+      if (n1cmp.isConst())
+      {
+        CVC4::String s = n1cmp.getConst<String>();
+        // overlap is an overapproximation of the number of characters
+        // n2[index1] can match in s
+        unsigned overlap = s.size();
+        if (n2[index1].isConst())
+        {
+          CVC4::String t = n2[index1].getConst<String>();
+          std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
+          if (ret == std::string::npos)
+          {
+            if (n1.size() == 1)
+            {
+              // can remove everything
+              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
+              //   str.contains( "", str.++( "ba", x ) )
+              removeComponent = true;
+            }
+            else if (sss.empty())  // only if not substr
+            {
+              // check how much overlap there is
+              // This is used to partially strip off the endpoint
+              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
+              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
+              overlap = r == 0 ? s.overlap(t) : t.overlap(s);
+            }
+            else
+            {
+              // if we are looking at a substring, we can remove the component
+              // if there is no overlap
+              //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
+              //        --> str.contains( x, "a" )
+              removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
+            }
+          }
+          else if (sss.empty())  // only if not substr
+          {
+            Assert(ret < s.size());
+            // can strip off up to the find position, e.g.
+            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
+            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
+            // and
+            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
+            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
+            overlap = s.size() - ret;
+          }
+        }
+        else
+        {
+          // inconclusive
+        }
+        // process the overlap
+        if (overlap < s.size())
+        {
+          changed = true;
+          if (overlap == 0)
+          {
+            removeComponent = true;
+          }
+          else
+          {
+            // can drop the prefix (resp. suffix) from the first (resp. last)
+            // component
+            if (r == 0)
+            {
+              nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
+              n1[index0] = nm->mkConst(s.suffix(overlap));
+            }
+            else
+            {
+              ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
+              n1[index0] = nm->mkConst(s.prefix(overlap));
+            }
+          }
+        }
+      }
+      else if (n1cmp.getKind() == STRING_ITOS)
+      {
+        if (n2[index1].isConst())
+        {
+          CVC4::String t = n2[index1].getConst<String>();
+
+          if (n1.size() == 1)
+          {
+            // if n1.size()==1, then if n2[index1] is not a number, we can drop
+            // the entire component
+            //    e.g. str.contains( int.to.str(x), "123a45") --> false
+            if (!t.isNumber())
+            {
+              removeComponent = true;
+            }
+          }
+          else
+          {
+            const std::vector<unsigned>& tvec = t.getVec();
+            Assert(tvec.size() > 0);
+
+            // if n1.size()>1, then if the first (resp. last) character of
+            // n2[index1]
+            //  is not a digit, we can drop the entire component, e.g.:
+            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
+            //    str.contains( y, "a12" )
+            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
+            //    str.contains( y, "a0b" )
+            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
+            if (!String::isDigit(tvec[i]))
+            {
+              removeComponent = true;
+            }
+          }
+        }
+      }
+      if (removeComponent)
+      {
+        // can drop entire first (resp. last) component
+        if (r == 0)
+        {
+          nb.push_back(n1[index0]);
+          n1.erase(n1.begin(), n1.begin() + 1);
+        }
+        else
+        {
+          ne.push_back(n1[index0]);
+          n1.pop_back();
+        }
+        if (n1.empty())
+        {
+          // if we've removed everything, just return (we will rewrite to false)
+          return true;
+        }
+        else
+        {
+          changed = true;
+        }
+      }
+    }
+  }
+  // TODO (#1180) : computing the maximal overlap in this function may be
+  // important.
+  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
+  // false
+  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
+  //   leaving 4 characters
+  //      which is larger that the upper bound for length of str.substr(y,0,3),
+  //      which is 3.
+  return changed;
+}
+
+Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Node ctn = nm->mkNode(STRING_STRCTN, a, b);
+
+  if (fullRewriter)
+  {
+    ctn = Rewriter::rewrite(ctn);
+  }
+  else
+  {
+    Node prev;
+    do
+    {
+      prev = ctn;
+      ctn = SequencesRewriter::rewriteContains(ctn);
+    } while (prev != ctn && ctn.getKind() == STRING_STRCTN);
+  }
+
+  Assert(ctn.getType().isBoolean());
+  return ctn.isConst() ? ctn : Node::null();
+}
+
+bool StringsEntail::checkNonEmpty(Node a)
+{
+  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
+  len = Rewriter::rewrite(len);
+  return ArithEntail::check(len, true);
+}
+
+bool StringsEntail::checkLengthOne(Node s, bool strict)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Node one = nm->mkConst(Rational(1));
+  Node len = nm->mkNode(STRING_LENGTH, s);
+  len = Rewriter::rewrite(len);
+  return ArithEntail::check(one, len)
+         && (!strict || ArithEntail::check(len, true));
+}
+
+bool StringsEntail::checkMultisetSubset(Node a, Node b)
+{
+  std::vector<Node> avec;
+  utils::getConcat(getMultisetApproximation(a), avec);
+  std::vector<Node> bvec;
+  utils::getConcat(b, bvec);
+
+  std::map<Node, unsigned> num_nconst[2];
+  std::map<Node, unsigned> num_const[2];
+  for (unsigned j = 0; j < 2; j++)
+  {
+    std::vector<Node>& jvec = j == 0 ? avec : bvec;
+    for (const Node& cc : jvec)
+    {
+      if (cc.isConst())
+      {
+        num_const[j][cc]++;
+      }
+      else
+      {
+        num_nconst[j][cc]++;
+      }
+    }
+  }
+  bool ms_success = true;
+  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
+  {
+    if (nncp.second > num_nconst[1][nncp.first])
+    {
+      ms_success = false;
+      break;
+    }
+  }
+  if (ms_success)
+  {
+    // count the number of constant characters in the first argument
+    std::map<Node, unsigned> count_const[2];
+    std::vector<Node> chars;
+    for (unsigned j = 0; j < 2; j++)
+    {
+      for (std::pair<const Node, unsigned>& ncp : num_const[j])
+      {
+        Node cn = ncp.first;
+        Assert(cn.isConst());
+        std::vector<Node> cnChars = Word::getChars(cn);
+        for (const Node& ch : cnChars)
+        {
+          count_const[j][ch] += ncp.second;
+          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
+          {
+            chars.push_back(ch);
+          }
+        }
+      }
+    }
+    Trace("strings-entail-ms-ss")
+        << "For " << a << " and " << b << " : " << std::endl;
+    for (const Node& ch : chars)
+    {
+      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
+      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
+      Trace("strings-entail-ms-ss")
+          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
+      if (count_const[0][ch] < count_const[1][ch])
+      {
+        return true;
+      }
+    }
+
+    // TODO (#1180): count the number of 2,3,4,.. character substrings
+    // for example:
+    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
+    // since the second argument contains more occurrences of "bb".
+    // note this is orthogonal reasoning to inductive reasoning
+    // via regular membership reduction in Liang et al CAV 2015.
+  }
+  return false;
+}
+
+Node StringsEntail::checkHomogeneousString(Node a)
+{
+  std::vector<Node> avec;
+  utils::getConcat(getMultisetApproximation(a), avec);
+
+  bool cValid = false;
+  Node c;
+  for (const Node& ac : avec)
+  {
+    if (ac.isConst())
+    {
+      std::vector<Node> acv = Word::getChars(ac);
+      for (const Node& cc : acv)
+      {
+        if (!cValid)
+        {
+          cValid = true;
+          c = cc;
+        }
+        else if (c != cc)
+        {
+          // Found a different character
+          return Node::null();
+        }
+      }
+    }
+    else
+    {
+      // Could produce a different character
+      return Node::null();
+    }
+  }
+
+  if (!cValid)
+  {
+    return Word::mkEmptyWord(a.getType());
+  }
+
+  return c;
+}
+
+Node StringsEntail::getMultisetApproximation(Node a)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  if (a.getKind() == STRING_SUBSTR)
+  {
+    return a[0];
+  }
+  else if (a.getKind() == STRING_STRREPL)
+  {
+    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
+  }
+  else if (a.getKind() == STRING_CONCAT)
+  {
+    NodeBuilder<> nb(STRING_CONCAT);
+    for (const Node& ac : a)
+    {
+      nb << getMultisetApproximation(ac);
+    }
+    return nb.constructNode();
+  }
+  else
+  {
+    return a;
+  }
+}
+
+Node StringsEntail::getStringOrEmpty(Node n)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Node res;
+  while (res.isNull())
+  {
+    switch (n.getKind())
+    {
+      case STRING_STRREPL:
+      {
+        Node empty = nm->mkConst(::CVC4::String(""));
+        if (n[0] == empty)
+        {
+          // (str.replace "" x y) --> y
+          n = n[2];
+          break;
+        }
+
+        if (checkLengthOne(n[0]) && n[2] == empty)
+        {
+          // (str.replace "A" x "") --> "A"
+          res = n[0];
+          break;
+        }
+
+        res = n;
+        break;
+      }
+      case STRING_SUBSTR:
+      {
+        if (checkLengthOne(n[0]))
+        {
+          // (str.substr "A" x y) --> "A"
+          res = n[0];
+          break;
+        }
+        res = n;
+        break;
+      }
+      default:
+      {
+        res = n;
+        break;
+      }
+    }
+  }
+  return res;
+}
+
+Node StringsEntail::inferEqsFromContains(Node x, Node y)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Node emp = nm->mkConst(String(""));
+  Assert(x.getType() == y.getType());
+  TypeNode stype = x.getType();
+
+  Node xLen = nm->mkNode(STRING_LENGTH, x);
+  std::vector<Node> yLens;
+  if (y.getKind() != STRING_CONCAT)
+  {
+    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
+  }
+  else
+  {
+    for (const Node& yi : y)
+    {
+      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
+    }
+  }
+
+  std::vector<Node> zeroLens;
+  if (x == emp)
+  {
+    // If x is the empty string, then all ys must be empty, too, and we can
+    // skip the expensive checks. Note that this is just a performance
+    // optimization.
+    zeroLens.swap(yLens);
+  }
+  else
+  {
+    // Check if we can infer that str.len(x) <= str.len(y). If that is the
+    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
+    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
+    // true. The terms that can have length zero without making the inequality
+    // false must be all be empty if (str.contains x y) is true.
+    if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
+    {
+      // We could not prove that the inequality holds
+      return Node::null();
+    }
+    else if (yLens.size() == y.getNumChildren())
+    {
+      // We could only prove that the inequality holds but not that any of the
+      // ys must be empty
+      return nm->mkNode(EQUAL, x, y);
+    }
+  }
+
+  if (y.getKind() != STRING_CONCAT)
+  {
+    if (zeroLens.size() == 1)
+    {
+      // y is not a concatenation and we found that it must be empty, so just
+      // return (= y "")
+      Assert(zeroLens[0][0] == y);
+      return nm->mkNode(EQUAL, y, emp);
+    }
+    else
+    {
+      Assert(yLens.size() == 1 && yLens[0][0] == y);
+      return nm->mkNode(EQUAL, x, y);
+    }
+  }
+
+  std::vector<Node> cs;
+  for (const Node& yiLen : yLens)
+  {
+    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
+    cs.push_back(yiLen[0]);
+  }
+
+  NodeBuilder<> nb(AND);
+  // (= x (str.++ y1' ... ym'))
+  if (!cs.empty())
+  {
+    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
+  }
+  // (= y1'' "") ... (= yk'' "")
+  for (const Node& zeroLen : zeroLens)
+  {
+    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
+    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
+  }
+
+  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
+  return nb.constructNode();
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/strings/strings_entail.h b/src/theory/strings/strings_entail.h

new file mode 100644 (file)

index 0000000..d4993fa
--- /dev/null
+++ b/src/theory/strings/strings_entail.h
@@ -0,0 +1,371 @@
+/*********************                                                        */
+/*! \file strings_entail.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Entailment tests involving strings
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__STRING_ENTAIL_H
+#define CVC4__THEORY__STRINGS__STRING_ENTAIL_H
+
+#include <vector>
+
+#include "expr/node.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+/**
+ * Entailment tests involving strings.
+ * Some of these techniques are described in Reynolds et al, "High Level
+ * Abstractions for Simplifying Extended String Constraints in SMT", CAV 2019.
+ */
+class StringsEntail
+{
+ public:
+  /** can constant contain list
+   * return true if constant c can contain the list l in order
+   * firstc/lastc store which indices in l were used to determine the return
+   * value.
+   *   (This is typically used when this function returns false, for minimizing
+   * explanations)
+   *
+   * For example:
+   *   canConstantContainList( "abc", { x, "c", y } ) returns true
+   *     firstc/lastc are updated to 1/1
+   *   canConstantContainList( "abc", { x, "d", y } ) returns false
+   *     firstc/lastc are updated to 1/1
+   *   canConstantContainList( "abcdef", { x, "b", y, "a", z, "c", w }
+   *     returns false
+   *     firstc/lastc are updated to 1/3
+   *   canConstantContainList( "abcdef", { x, "b", y, "e", z, "c", w }
+   *     returns false
+   *     firstc/lastc are updated to 1/5
+   */
+  static bool canConstantContainList(Node c,
+                                     std::vector<Node>& l,
+                                     int& firstc,
+                                     int& lastc);
+  /** can constant contain concat
+   * same as above but with n = str.++( l ) instead of l
+   */
+  static bool canConstantContainConcat(Node c, Node n, int& firstc, int& lastc);
+
+  /** strip symbolic length
+   *
+   * This function strips off components of n1 whose length is less than
+   * or equal to argument curr, and stores them in nr. The direction
+   * dir determines whether the components are removed from the start
+   * or end of n1.
+   *
+   * In detail, this function updates n1 to n1' such that:
+   *   If dir=1,
+   *     n1 = str.++( nr, n1' )
+   *   If dir=-1
+   *     n1 = str.++( n1', nr )
+   * It updates curr to curr' such that:
+   *   curr' = curr - str.len( str.++( nr ) ), and
+   *   curr' >= 0
+   * where the latter fact is determined by checkArithEntail.
+   *
+   * This function returns true if n1 is modified.
+   *
+   * For example:
+   *
+   *  stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)+1 )
+   *    returns true
+   *    n1 is updated to { "bc", y }
+   *    nr is updated to { x, "a" }
+   *    curr is updated to 0   *
+   *
+   * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)-1 )
+   *    returns false
+   *
+   *  stripSymbolicLength( { y, "abc", x }, {}, 1, str.len(x)+1 )
+   *    returns false
+   *
+   *  stripSymbolicLength( { x, "abc", y }, {}, -1, 2*str.len(y)+4 )
+   *    returns true
+   *    n1 is updated to { x }
+   *    nr is updated to { "abc", y }
+   *    curr is updated to str.len(y)+1
+   */
+  static bool stripSymbolicLength(std::vector<Node>& n1,
+                                  std::vector<Node>& nr,
+                                  int dir,
+                                  Node& curr);
+  /** component contains
+   * This function is used when rewriting str.contains( t1, t2 ), where
+   * n1 is the vector form of t1
+   * n2 is the vector form of t2
+   *
+   * If this function returns n>=0 for some n, then
+   *    n1 = { x1...x{n-1} xn...x{n+s} x{n+s+1}...xm },
+   *    n2 = { y1...ys },
+   *    y1 is a suffix of xn,
+   *    y2...y{s-1} = x{n+1}...x{n+s-1}, and
+   *    ys is a prefix of x{n+s}
+   * Otherwise it returns -1.
+   *
+   * This function may update n1 if computeRemainder = true.
+   *   We maintain the invariant that the resulting value n1'
+   *   of n1 after this function is such that:
+   *     n1 = str.++( nb, n1', ne )
+   * The vectors nb and ne have the following properties.
+   * If computeRemainder = true, then
+   *   If remainderDir != -1, then
+   *     ne is { x{n+s}' x{n+s+1}...xm }
+   *     where x{n+s} = str.++( ys, x{n+s}' ).
+   *   If remainderDir != 1, then
+   *     nb is { x1, ..., x{n-1}, xn' }
+   *     where xn = str.++( xn', y1 ).
+   *
+   * For example:
+   *
+   * componentContains({ x, "abc", x }, { "b" }, {}, true, 0)
+   *   returns 1,
+   *   n1 is updated to { "b" },
+   *   nb is updated to { x, "a" },
+   *   ne is updated to { "c", x }
+   *
+   * componentContains({ x, "abc", x }, { "b" }, {}, true, 1)
+   *   returns 1,
+   *   n1 is updated to { x, "ab" },
+   *   ne is updated to { "c", x }
+   *
+   * componentContains({ y, z, "abc", x, "def" }, { "c", x, "de" }, {}, true, 1)
+   *   returns 2,
+   *   n1 is updated to { y, z, "abc", x, "de" },
+   *   ne is updated to { "f" }
+   *
+   * componentContains({ y, "abc", x, "def" }, { "c", x, "de" }, {}, true, -1)
+   *   returns 1,
+   *   n1 is updated to { "c", x, "def" },
+   *   nb is updated to { y, "ab" }
+   */
+  static int componentContains(std::vector<Node>& n1,
+                               std::vector<Node>& n2,
+                               std::vector<Node>& nb,
+                               std::vector<Node>& ne,
+                               bool computeRemainder = false,
+                               int remainderDir = 0);
+  /** strip constant endpoints
+   * This function is used when rewriting str.contains( t1, t2 ), where
+   * n1 is the vector form of t1
+   * n2 is the vector form of t2
+   *
+   * It modifies n1 to a new vector n1' such that:
+   * (1) str.contains( str.++( n1 ), str.++( n2 ) ) is equivalent to
+   * str.contains( str.++( n1' ), str.++( n2 ) )
+   * (2) str.++( n1 ) = str.++( nb, n1', ne )
+   *
+   * "dir" is the direction in which we can modify n1:
+   * if dir = 1, then we allow dropping components from the front of n1,
+   * if dir = -1, then we allow dropping components from the back of n1,
+   * if dir = 0, then we allow dropping components from either.
+   *
+   * It returns true if n1 is modified.
+   *
+   * For example:
+   * stripConstantEndpoints({ "ab", x, "de" }, { "c" }, {}, {}, 1)
+   *   returns true,
+   *   n1 is updated to { x, "de" }
+   *   nb is updated to { "ab" }
+   * stripConstantEndpoints({ "ab", x, "de" }, { "bd" }, {}, {}, 0)
+   *   returns true,
+   *   n1 is updated to { "b", x, "d" }
+   *   nb is updated to { "a" }
+   *   ne is updated to { "e" }
+   * stripConstantEndpoints({ "ad", substr("ccc",x,y) }, { "d" }, {}, {}, -1)
+   *   returns true,
+   *   n1 is updated to {"ad"}
+   *   ne is updated to { substr("ccc",x,y) }
+   */
+  static bool stripConstantEndpoints(std::vector<Node>& n1,
+                                     std::vector<Node>& n2,
+                                     std::vector<Node>& nb,
+                                     std::vector<Node>& ne,
+                                     int dir = 0);
+
+  /**
+   * Checks whether a string term `a` is entailed to contain or not contain a
+   * string term `b`.
+   *
+   * @param a The string that is checked whether it contains `b`
+   * @param b The string that is checked whether it is contained in `a`
+   * @param fullRewriter Determines whether the function can use the full
+   * rewriter or only `rewriteContains()` (useful for avoiding loops)
+   * @return true node if it can be shown that `a` contains `b`, false node if
+   * it can be shown that `a` does not contain `b`, null node otherwise
+   */
+  static Node checkContains(Node a, Node b, bool fullRewriter = true);
+
+  /** entail non-empty
+   *
+   * Checks whether string a is entailed to be non-empty. Is equivalent to
+   * the call checkArithEntail( len( a ), true ).
+   */
+  static bool checkNonEmpty(Node a);
+
+  /**
+   * Checks whether string has at most/exactly length one. Length one strings
+   * can be used for more aggressive rewriting because there is guaranteed that
+   * it cannot be overlap multiple components in a string concatenation.
+   *
+   * @param s The string to check
+   * @param strict If true, the string must have exactly length one, otherwise
+   * at most length one
+   * @return True if the string has at most/exactly length one, false otherwise
+   */
+  static bool checkLengthOne(Node s, bool strict = false);
+
+  /**
+   * Checks whether it is always true that `a` is a strict subset of `b` in the
+   * multiset domain.
+   *
+   * Examples:
+   *
+   * a = (str.++ "A" x), b = (str.++ "A" x "B") ---> true
+   * a = (str.++ "A" x), b = (str.++ "B" x "AA") ---> true
+   * a = (str.++ "A" x), b = (str.++ "B" y "AA") ---> false
+   *
+   * @param a The term for which it should be checked if it is a strict subset
+   * of `b` in the multiset domain
+   * @param b The term for which it should be checked if it is a strict
+   * superset of `a` in the multiset domain
+   * @return True if it is always the case that `a` is a strict subset of `b`,
+   * false otherwise.
+   */
+  static bool checkMultisetSubset(Node a, Node b);
+
+  /**
+   * Returns a character `c` if it is always the case that str.in.re(a, c*),
+   * i.e. if all possible values of `a` only consist of `c` characters, and the
+   * null node otherwise. If `a` is the empty string, the function returns an
+   * empty string.
+   *
+   * @param a The node to check for homogeneity
+   * @return If `a` is homogeneous, the only character that it may contain, the
+   * empty string if `a` is empty, and the null node otherwise
+   */
+  static Node checkHomogeneousString(Node a);
+
+  /**
+   * Overapproximates the possible values of node n. This overapproximation
+   * assumes that n can return a value x or the empty string and tries to find
+   * the simplest x such that this holds. In the general case, x is the same as
+   * the input n. This overapproximation can be used to sort terms with the
+   * same possible values in string concatenation for example.
+   *
+   * Example:
+   *
+   * getStringOrEmpty( (str.replace "" x y) ) --> y because (str.replace "" x y)
+   * either returns y or ""
+   *
+   * getStringOrEmpty( (str.substr "ABC" x y) ) --> (str.substr "ABC" x y)
+   * because the function could not compute a simpler
+   */
+  static Node getStringOrEmpty(Node n);
+
+  /**
+   * Infers a conjunction of equalities that correspond to (str.contains x y)
+   * if it can show that the length of y is greater or equal to the length of
+   * x. If y is a concatentation, we get x = y1 ++ ... ++ yn, the conjunction
+   * is of the form:
+   *
+   * (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
+   *
+   * where each yi'' are yi that must be empty for (= x y) to hold and yi' are
+   * yi that the function could not infer anything about.  Returns a null node
+   * if the function cannot infer that str.len(y) >= str.len(x). Returns (= x
+   * y) if the function can infer that str.len(y) >= str.len(x) but cannot
+   * infer that any of the yi must be empty.
+   */
+  static Node inferEqsFromContains(Node x, Node y);
+
+ private:
+  /** component contains base
+   *
+   * This function is a helper for the above function.
+   *
+   * It returns true if n2 is contained in n1 with the following
+   * restrictions:
+   *   If dir=1, then n2 must be a suffix of n1.
+   *   If dir=-1, then n2 must be a prefix of n1.
+   *
+   * If computeRemainder is true, then n1rb and n1re are
+   * updated such that :
+   *   n1 = str.++( n1rb, n2, n1re )
+   * where a null value of n1rb and n1re indicates the
+   * empty string.
+   *
+   * For example:
+   *
+   * componentContainsBase("cabe", "ab", n1rb, n1re, 1, false)
+   *   returns false.
+   *
+   * componentContainsBase("cabe", "ab", n1rb, n1re, 0, true)
+   *   returns true,
+   *   n1rb is set to "c",
+   *   n1re is set to "e".
+   *
+   * componentContainsBase(y, str.substr(y,0,5), n1rb, n1re, -1, true)
+   *   returns true,
+   *   n1re is set to str.substr(y,5,str.len(y)).
+   *
+   *
+   * Notice that this function may return false when it cannot compute a
+   * remainder when it otherwise would have returned true. For example:
+   *
+   * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, false)
+   *   returns true.
+   *
+   * Hence, we know that str.substr(y,x,z) is contained in y. However:
+   *
+   * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, true)
+   *   returns false.
+   *
+   * The reason is since computeRemainder=true, it must be that
+   *   y = str.++( n1rb, str.substr(y,x,z), n1re )
+   * for some n1rb, n1re. However, to construct such n1rb, n1re would require
+   * e.g. the terms:
+   *   y = str.++( ite( x+z < 0 OR x < 0, "", str.substr(y,0,x) ),
+   *               str.substr(y,x,z),
+   *               ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) )
+   *
+   * Since we do not wish to introduce ITE terms in the rewriter, we instead
+   * return false, indicating that we cannot compute the remainder.
+   */
+  static bool componentContainsBase(
+      Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder);
+  /**
+   * Simplifies a given node `a` s.t. the result is a concatenation of string
+   * terms that can be interpreted as a multiset and which contains all
+   * multisets that `a` could form.
+   *
+   * Examples:
+   *
+   * (str.substr "AA" 0 n) ---> "AA"
+   * (str.replace "AAA" x "BB") ---> (str.++ "AAA" "BB")
+   *
+   * @param a The node to simplify
+   * @return A concatenation that can be interpreted as a multiset
+   */
+  static Node getMultisetApproximation(Node a);
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
+
+#endif /* CVC4__THEORY__STRINGS__STRING_ENTAIL_H */
diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp

index 7777b9bd7cd774ee6c91cc9062d85915e2c75637..097cef23515cd39e081c39798e4b581434d5d163 100644 (file)
--- a/src/theory/strings/theory_strings_preprocess.cpp
+++ b/src/theory/strings/theory_strings_preprocess.cpp
@@ -22,6 +22,7 @@
  #include "options/strings_options.h"
  #include "proof/proof_manager.h"
  #include "smt/logic_exception.h"
+#include "theory/strings/arith_entail.h"
  #include "theory/strings/sequences_rewriter.h"
  #include "theory/strings/word.h"
  
@@ -75,7 +76,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
      Node sk1 = n == d_zero ? emp
                             : d_sc->mkSkolemCached(
                                   s, n, SkolemCache::SK_PREFIX, "sspre");
-    Node sk2 = SequencesRewriter::checkEntailArith(t12, lt0)
+    Node sk2 = ArithEntail::check(t12, lt0)
                     ? emp
                     : d_sc->mkSkolemCached(
                           s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp

index 3b4c757f2f76a0c2a398d57f4313f361f57ac922..b2a62cc62dfba5f2827eb922778d66307f19c99c 100644 (file)
--- a/src/theory/strings/theory_strings_utils.cpp
+++ b/src/theory/strings/theory_strings_utils.cpp
@@ -18,6 +18,8 @@
  
  #include "options/strings_options.h"
  #include "theory/rewriter.h"
+#include "theory/strings/arith_entail.h"
+#include "theory/strings/strings_entail.h"
  #include "theory/strings/word.h"
  
  using namespace CVC4::kind;
@@ -178,6 +180,84 @@ Node getConstantEndpoint(Node e, bool isSuf)
    return getConstantComponent(e);
  }
  
+Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls)
+{
+  Assert(ss.empty());
+  Assert(ls.empty());
+  while (s.getKind() == STRING_SUBSTR)
+  {
+    ss.push_back(s[1]);
+    ls.push_back(s[2]);
+    s = s[0];
+  }
+  std::reverse(ss.begin(), ss.end());
+  std::reverse(ls.begin(), ls.end());
+  return s;
+}
+
+Node mkSubstrChain(Node base,
+                   const std::vector<Node>& ss,
+                   const std::vector<Node>& ls)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  for (unsigned i = 0, size = ss.size(); i < size; i++)
+  {
+    base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
+  }
+  return base;
+}
+
+std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
+{
+  // Collect the equalities of the form (= x "") (sorted)
+  std::set<TNode> emptyNodes;
+  bool allEmptyEqs = true;
+  if (x.getKind() == EQUAL)
+  {
+    if (Word::isEmpty(x[0]))
+    {
+      emptyNodes.insert(x[1]);
+    }
+    else if (Word::isEmpty(x[1]))
+    {
+      emptyNodes.insert(x[0]);
+    }
+    else
+    {
+      allEmptyEqs = false;
+    }
+  }
+  else if (x.getKind() == AND)
+  {
+    for (const Node& c : x)
+    {
+      if (c.getKind() == EQUAL)
+      {
+        if (Word::isEmpty(c[0]))
+        {
+          emptyNodes.insert(c[1]);
+        }
+        else if (Word::isEmpty(c[1]))
+        {
+          emptyNodes.insert(c[0]);
+        }
+      }
+      else
+      {
+        allEmptyEqs = false;
+      }
+    }
+  }
+
+  if (emptyNodes.size() == 0)
+  {
+    allEmptyEqs = false;
+  }
+
+  return std::make_pair(
+      allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
+}
+
  bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
  {
    size_t i = start;
@@ -270,6 +350,15 @@ bool isStringKind(Kind k)
           || k == STRING_FROM_CODE || k == STRING_TO_CODE;
  }
  
+bool isRegExpKind(Kind k)
+{
+  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
+         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
+         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
+         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
+         || k == REGEXP_COMPLEMENT;
+}
+
  TypeNode getOwnerStringType(Node n)
  {
    TypeNode tn;
diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h

index 846d3d563f1130c7bb818ffe0ba6824f14cec8c6..9fc23499a7ce58310d5720c3ae6f24ceed8519d1 100644 (file)
--- a/src/theory/strings/theory_strings_utils.h
+++ b/src/theory/strings/theory_strings_utils.h
@@ -102,6 +102,41 @@ Node getConstantComponent(Node t);
   */
  Node getConstantEndpoint(Node e, bool isSuf);
  
+/** decompose substr chain
+ *
+ * If s is substr( ... substr( base, x1, y1 ) ..., xn, yn ), then this
+ * function returns base, adds { x1 ... xn } to ss, and { y1 ... yn } to ls.
+ */
+Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls);
+/** make substr chain
+ *
+ * If ss is { x1 ... xn } and ls is { y1 ... yn }, this returns the term
+ * substr( ... substr( base, x1, y1 ) ..., xn, yn ).
+ */
+Node mkSubstrChain(Node base,
+                   const std::vector<Node>& ss,
+                   const std::vector<Node>& ls);
+
+/**
+ * Collects equal-to-empty nodes from a conjunction or a single
+ * node. Returns a list of nodes that are compared to empty nodes
+ * and a boolean that indicates whether all nodes in the
+ * conjunction were a comparison with the empty node. The nodes in
+ * the list are sorted and duplicates removed.
+ *
+ * Examples:
+ *
+ * collectEmptyEqs( (= "" x) ) = { true, [x] }
+ * collectEmptyEqs( (and (= "" x) (= "" y)) ) = { true, [x, y] }
+ * collectEmptyEqs( (and (= "A" x) (= "" y) (= "" y)) ) = { false, [y] }
+ *
+ * @param x The conjunction of equalities or a single equality
+ * @return A pair of a boolean that indicates whether the
+ * conjunction consists only of comparisons to the empty string
+ * and the list of nodes that are compared to the empty string
+ */
+std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x);
+
  /**
   * Given a vector of regular expression nodes and a start index that points to
   * a wildcard, returns true if the wildcard is unbounded (i.e. it is followed
@@ -142,6 +177,8 @@ void printConcatTrace(std::vector<Node>& n, const char* c);
  
  /** Is k a string-specific kind? */
  bool isStringKind(Kind k);
+/** is k a native operator whose return type is a regular expression? */
+bool isRegExpKind(Kind k);
  
  /** Get owner string type
   *
diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp

index b42cf3160e0ab060c18883e1805c36aefbc8a6ac..085078dea0d7cfd78267008e80acbace57db9f65 100644 (file)
--- a/src/theory/strings/word.cpp
+++ b/src/theory/strings/word.cpp
@@ -99,7 +99,7 @@ std::vector<Node> Word::getChars(TNode x)
    return ret;
  }
  
-bool Word::isEmpty(TNode x) { return getLength(x) == 0; }
+bool Word::isEmpty(TNode x) { return x.isConst() && getLength(x) == 0; }
  
  bool Word::strncmp(TNode x, TNode y, std::size_t n)
  {
@@ -283,6 +283,31 @@ std::size_t Word::roverlap(TNode x, TNode y)
    return 0;
  }
  
+Node Word::splitConstant(Node x, Node y, size_t& index, bool isRev)
+{
+  Assert(x.isConst() && y.isConst());
+  size_t lenA = getLength(x);
+  size_t lenB = getLength(y);
+  index = lenA <= lenB ? 1 : 0;
+  size_t lenShort = index == 1 ? lenA : lenB;
+  bool cmp = isRev ? rstrncmp(x, y, lenShort) : strncmp(x, y, lenShort);
+  if (cmp)
+  {
+    Node l = index == 0 ? x : y;
+    if (isRev)
+    {
+      size_t new_len = getLength(l) - lenShort;
+      return substr(l, 0, new_len);
+    }
+    else
+    {
+      return substr(l, lenShort);
+    }
+  }
+  // not the same prefix/suffix
+  return Node::null();
+}
+
  }  // namespace strings
  }  // namespace theory
  }  // namespace CVC4
diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h

index 8e6e7876e6cb745b9eb8b23fd688859afc1d1b7f..454710c98cf020320ed23bd794d7497e978b5b2d 100644 (file)
--- a/src/theory/strings/word.h
+++ b/src/theory/strings/word.h
@@ -139,6 +139,19 @@ class Word
     * Notice that x.overlap(y) = y.roverlap(x)
     */
    static std::size_t roverlap(TNode x, TNode y);
+  /** Split constant
+   *
+   * This returns the suffix remainder (resp. prefix remainder when isRev is
+   * true) of words a and b, call it r, such that:
+   * (1) a = b ++ r , or
+   * (2) a ++ r = b
+   * when isRev = false.  The argument index is set to 1 if we are in the second
+   * case, and 0 otherwise.
+   *
+   * If a and b do not share a common prefix (resp. suffix), then this method
+   * returns the null node.
+   */
+  static Node splitConstant(Node a, Node b, size_t& index, bool isRev);
  };
  
  // ------------------------------ end for words (string or sequence constants)
diff --git a/test/unit/theory/sequences_rewriter_white.h b/test/unit/theory/sequences_rewriter_white.h

index c823c07049d35116ee01d1d525a587092902b78b..4cc679ca8a813858ac52603a3c0f4d0190e1445b 100644 (file)
--- a/test/unit/theory/sequences_rewriter_white.h
+++ b/test/unit/theory/sequences_rewriter_white.h
@@ -20,7 +20,9 @@
  #include "smt/smt_engine_scope.h"
  #include "theory/quantifiers/extended_rewrite.h"
  #include "theory/rewriter.h"
+#include "theory/strings/arith_entail.h"
  #include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/strings_entail.h"
  
  #include <cxxtest/TestSuite.h>
  #include <iostream>
@@ -108,23 +110,23 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
      Node three = d_nm->mkConst(Rational(3));
      Node i = d_nm->mkVar("i", intType);
  
-    TS_ASSERT(SequencesRewriter::checkEntailLengthOne(a));
-    TS_ASSERT(SequencesRewriter::checkEntailLengthOne(a, true));
+    TS_ASSERT(StringsEntail::checkLengthOne(a));
+    TS_ASSERT(StringsEntail::checkLengthOne(a, true));
  
      Node substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, one);
-    TS_ASSERT(SequencesRewriter::checkEntailLengthOne(substr));
-    TS_ASSERT(!SequencesRewriter::checkEntailLengthOne(substr, true));
+    TS_ASSERT(StringsEntail::checkLengthOne(substr));
+    TS_ASSERT(!StringsEntail::checkLengthOne(substr, true));
  
      substr = d_nm->mkNode(kind::STRING_SUBSTR,
                            d_nm->mkNode(kind::STRING_CONCAT, a, x),
                            zero,
                            one);
-    TS_ASSERT(SequencesRewriter::checkEntailLengthOne(substr));
-    TS_ASSERT(SequencesRewriter::checkEntailLengthOne(substr, true));
+    TS_ASSERT(StringsEntail::checkLengthOne(substr));
+    TS_ASSERT(StringsEntail::checkLengthOne(substr, true));
  
      substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, two);
-    TS_ASSERT(!SequencesRewriter::checkEntailLengthOne(substr));
-    TS_ASSERT(!SequencesRewriter::checkEntailLengthOne(substr, true));
+    TS_ASSERT(!StringsEntail::checkLengthOne(substr));
+    TS_ASSERT(!StringsEntail::checkLengthOne(substr, true));
    }
  
    void testCheckEntailArith()
@@ -139,10 +141,10 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
      // 1 >= (str.len (str.substr z n 1)) ---> true
      Node substr_z = d_nm->mkNode(kind::STRING_LENGTH,
                                   d_nm->mkNode(kind::STRING_SUBSTR, z, n, one));
-    TS_ASSERT(SequencesRewriter::checkEntailArith(one, substr_z));
+    TS_ASSERT(ArithEntail::check(one, substr_z));
  
      // (str.len (str.substr z n 1)) >= 1 ---> false
-    TS_ASSERT(!SequencesRewriter::checkEntailArith(substr_z, one));
+    TS_ASSERT(!ArithEntail::check(substr_z, one));
    }
  
    void testCheckEntailArithWithAssumption()
@@ -166,25 +168,25 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
          Rewriter::rewrite(d_nm->mkNode(kind::EQUAL, x_plus_slen_y, zero));
  
      // x + (str.len y) = 0 |= 0 >= x --> true
-    TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption(
+    TS_ASSERT(ArithEntail::checkWithAssumption(
          x_plus_slen_y_eq_zero, zero, x, false));
  
      // x + (str.len y) = 0 |= 0 > x --> false
-    TS_ASSERT(!SequencesRewriter::checkEntailArithWithAssumption(
+    TS_ASSERT(!ArithEntail::checkWithAssumption(
          x_plus_slen_y_eq_zero, zero, x, true));
  
      Node x_plus_slen_y_plus_z_eq_zero = Rewriter::rewrite(d_nm->mkNode(
          kind::EQUAL, d_nm->mkNode(kind::PLUS, x_plus_slen_y, z), zero));
  
      // x + (str.len y) + z = 0 |= 0 > x --> false
-    TS_ASSERT(!SequencesRewriter::checkEntailArithWithAssumption(
+    TS_ASSERT(!ArithEntail::checkWithAssumption(
          x_plus_slen_y_plus_z_eq_zero, zero, x, true));
  
      Node x_plus_slen_y_plus_slen_y_eq_zero = Rewriter::rewrite(d_nm->mkNode(
          kind::EQUAL, d_nm->mkNode(kind::PLUS, x_plus_slen_y, slen_y), zero));
  
      // x + (str.len y) + (str.len y) = 0 |= 0 >= x --> true
-    TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption(
+    TS_ASSERT(ArithEntail::checkWithAssumption(
          x_plus_slen_y_plus_slen_y_eq_zero, zero, x, false));
  
      Node five = d_nm->mkConst(Rational(5));
@@ -194,28 +196,28 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
          Rewriter::rewrite(d_nm->mkNode(kind::LT, x_plus_five, six));
  
      // x + 5 < 6 |= 0 >= x --> true
-    TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption(
-        x_plus_five_lt_six, zero, x, false));
+    TS_ASSERT(
+        ArithEntail::checkWithAssumption(x_plus_five_lt_six, zero, x, false));
  
      // x + 5 < 6 |= 0 > x --> false
-    TS_ASSERT(!SequencesRewriter::checkEntailArithWithAssumption(
-        x_plus_five_lt_six, zero, x, true));
+    TS_ASSERT(
+        !ArithEntail::checkWithAssumption(x_plus_five_lt_six, zero, x, true));
  
      Node neg_x = d_nm->mkNode(kind::UMINUS, x);
      Node x_plus_five_lt_five =
          Rewriter::rewrite(d_nm->mkNode(kind::LT, x_plus_five, five));
  
      // x + 5 < 5 |= -x >= 0 --> true
-    TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption(
+    TS_ASSERT(ArithEntail::checkWithAssumption(
          x_plus_five_lt_five, neg_x, zero, false));
  
      // x + 5 < 5 |= 0 > x --> true
-    TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption(
-        x_plus_five_lt_five, zero, x, false));
+    TS_ASSERT(
+        ArithEntail::checkWithAssumption(x_plus_five_lt_five, zero, x, false));
  
      // 0 < x |= x >= (str.len (int.to.str x))
      Node assm = Rewriter::rewrite(d_nm->mkNode(kind::LT, zero, x));
-    TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption(
+    TS_ASSERT(ArithEntail::checkWithAssumption(
          assm,
          x,
          d_nm->mkNode(kind::STRING_LENGTH, d_nm->mkNode(kind::STRING_ITOS, x)),
@@ -1050,33 +1052,30 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
      Node empty_x_y = d_nm->mkNode(kind::AND,
                                    d_nm->mkNode(kind::EQUAL, empty, x),
                                    d_nm->mkNode(kind::EQUAL, empty, y));
-    sameNormalForm(SequencesRewriter::inferEqsFromContains(empty, xy),
-                   empty_x_y);
+    sameNormalForm(StringsEntail::inferEqsFromContains(empty, xy), empty_x_y);
  
      // inferEqsFromContains(x, (str.++ x y)) returns false
      Node bxya = d_nm->mkNode(kind::STRING_CONCAT, b, y, x, a);
-    sameNormalForm(SequencesRewriter::inferEqsFromContains(x, bxya), f);
+    sameNormalForm(StringsEntail::inferEqsFromContains(x, bxya), f);
  
      // inferEqsFromContains(x, y) returns null
-    Node n = SequencesRewriter::inferEqsFromContains(x, y);
+    Node n = StringsEntail::inferEqsFromContains(x, y);
      TS_ASSERT(n.isNull());
  
      // inferEqsFromContains(x, x) returns something equivalent to (= x x)
      Node eq_x_x = d_nm->mkNode(kind::EQUAL, x, x);
-    sameNormalForm(SequencesRewriter::inferEqsFromContains(x, x), eq_x_x);
+    sameNormalForm(StringsEntail::inferEqsFromContains(x, x), eq_x_x);
  
      // inferEqsFromContains((str.replace x "B" "A"), x) returns something
      // equivalent to (= (str.replace x "B" "A") x)
      Node repl = d_nm->mkNode(kind::STRING_STRREPL, x, b, a);
      Node eq_repl_x = d_nm->mkNode(kind::EQUAL, repl, x);
-    sameNormalForm(SequencesRewriter::inferEqsFromContains(repl, x),
-                   eq_repl_x);
+    sameNormalForm(StringsEntail::inferEqsFromContains(repl, x), eq_repl_x);
  
      // inferEqsFromContains(x, (str.replace x "B" "A")) returns something
      // equivalent to (= (str.replace x "B" "A") x)
      Node eq_x_repl = d_nm->mkNode(kind::EQUAL, x, repl);
-    sameNormalForm(SequencesRewriter::inferEqsFromContains(x, repl),
-                   eq_x_repl);
+    sameNormalForm(StringsEntail::inferEqsFromContains(x, repl), eq_x_repl);
    }
  
    void testRewritePrefixSuffix()
@@ -1402,8 +1401,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
        std::vector<Node> n2 = {a};
        std::vector<Node> nb;
        std::vector<Node> ne;
-      bool res =
-          SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 0);
+      bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, 0);
        TS_ASSERT(!res);
      }
  
@@ -1414,8 +1412,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
        std::vector<Node> n2 = {a, d_nm->mkNode(kind::STRING_ITOS, n)};
        std::vector<Node> nb;
        std::vector<Node> ne;
-      bool res =
-          SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 0);
+      bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, 0);
        TS_ASSERT(!res);
      }
  
@@ -1430,8 +1427,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
        std::vector<Node> ne;
        std::vector<Node> n1r = {cd};
        std::vector<Node> nbr = {ab};
-      bool res =
-          SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 1);
+      bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, 1);
        TS_ASSERT(res);
        TS_ASSERT_EQUALS(n1, n1r);
        TS_ASSERT_EQUALS(nb, nbr);
@@ -1448,8 +1444,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
        std::vector<Node> ne;
        std::vector<Node> n1r = {c, x};
        std::vector<Node> nbr = {ab};
-      bool res =
-          SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 1);
+      bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, 1);
        TS_ASSERT(res);
        TS_ASSERT_EQUALS(n1, n1r);
        TS_ASSERT_EQUALS(nb, nbr);
@@ -1466,8 +1461,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
        std::vector<Node> ne;
        std::vector<Node> n1r = {a};
        std::vector<Node> ner = {bc};
-      bool res =
-          SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, -1);
+      bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, -1);
        TS_ASSERT(res);
        TS_ASSERT_EQUALS(n1, n1r);
        TS_ASSERT_EQUALS(ne, ner);
@@ -1484,8 +1478,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
        std::vector<Node> ne;
        std::vector<Node> n1r = {x, a};
        std::vector<Node> ner = {bc};
-      bool res =
-          SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, -1);
+      bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, -1);
        TS_ASSERT(res);
        TS_ASSERT_EQUALS(n1, n1r);
        TS_ASSERT_EQUALS(ne, ner);
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Fri, 3 Apr 2020 14:43:12 +0000 (09:43 -0500)
committer	GitHub <noreply@github.com>
	Fri, 3 Apr 2020 14:43:12 +0000 (09:43 -0500)
src/CMakeLists.txt		patch \| blob \| history
src/theory/strings/arith_entail.cpp	[new file with mode: 0644]	patch \| blob
src/theory/strings/arith_entail.h	[new file with mode: 0644]	patch \| blob
src/theory/strings/core_solver.cpp		patch \| blob \| history
src/theory/strings/regexp_elim.cpp		patch \| blob \| history
src/theory/strings/regexp_entail.cpp	[new file with mode: 0644]	patch \| blob
src/theory/strings/regexp_entail.h	[new file with mode: 0644]	patch \| blob
src/theory/strings/regexp_operation.cpp		patch \| blob \| history
src/theory/strings/regexp_operation.h		patch \| blob \| history
src/theory/strings/regexp_solver.cpp		patch \| blob \| history
src/theory/strings/sequences_rewriter.cpp		patch \| blob \| history
src/theory/strings/sequences_rewriter.h		patch \| blob \| history
src/theory/strings/skolem_cache.cpp		patch \| blob \| history
src/theory/strings/strings_entail.cpp	[new file with mode: 0644]	patch \| blob
src/theory/strings/strings_entail.h	[new file with mode: 0644]	patch \| blob
src/theory/strings/theory_strings_preprocess.cpp		patch \| blob \| history
src/theory/strings/theory_strings_utils.cpp		patch \| blob \| history
src/theory/strings/theory_strings_utils.h		patch \| blob \| history
src/theory/strings/word.cpp		patch \| blob \| history
src/theory/strings/word.h		patch \| blob \| history
test/unit/theory/sequences_rewriter_white.h		patch \| blob \| history