Enable model-based reduction technique for strings (#7680)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Tue, 23 Nov 2021 22:47:09 +0000 (16:47 -0600)

committer GitHub <noreply@github.com>

Tue, 23 Nov 2021 22:47:09 +0000 (22:47 +0000)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Tue, 23 Nov 2021 22:47:09 +0000 (16:47 -0600)
committer GitHub <noreply@github.com>
Tue, 23 Nov 2021 22:47:09 +0000 (22:47 +0000)
diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml

index c32c3adbb8fa3208581a4bb9ffca1bc065e58e3a..caebcec3afe92980d37d68c76f1fc640c4106526 100644 (file)
--- a/src/options/strings_options.toml
+++ b/src/options/strings_options.toml
@@ -115,12 +115,12 @@ name   = "Strings Theory"
    help       = "minimize explanations for prefix of normal forms in strings"
  
  [[option]]
-  name       = "stringGuessModel"
+  name       = "stringModelBasedReduction"
    category   = "regular"
-  long       = "strings-guess-model"
+  long       = "strings-mbr"
    type       = "bool"
-  default    = "false"
-  help       = "use model guessing to avoid string extended function reductions"
+  default    = "true"
+  help       = "use models to avoid reductions for extended functions that introduce quantified formulas"
  
  [[option]]
    name       = "regExpElim"
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp

index cbd67f232de6923cdc433d473a4e0d7861bd104d..52b0a6907f095ea9c02d2eb7bcb6fa9da3f4100b 100644 (file)
--- a/src/theory/strings/extf_solver.cpp
+++ b/src/theory/strings/extf_solver.cpp
@@ -80,11 +80,12 @@ ExtfSolver::~ExtfSolver() {}
  
  bool ExtfSolver::doReduction(int effort, Node n)
  {
-  Assert(d_extfInfoTmp.find(n) != d_extfInfoTmp.end());
-  if (!d_extfInfoTmp[n].d_modelActive)
+  Trace("strings-extf-debug")
+      << "doReduction " << n << ", effort " << effort << std::endl;
+  if (!isActiveInModel(n))
    {
      // n is not active in the model, no need to reduce
-  Trace("strings-extf-debug") << "...skip due to model active" << std::endl;
+    Trace("strings-extf-debug") << "...skip due to model active" << std::endl;
      return false;
    }
    if (d_reduced.find(n)!=d_reduced.end())
@@ -93,9 +94,6 @@ bool ExtfSolver::doReduction(int effort, Node n)
      Trace("strings-extf-debug") << "...skip due to reduced" << std::endl;
      return false;
    }
-  // determine the effort level to process the extf at
-  // 0 - at assertion time, 1+ - after no other reduction is applicable
-  int r_effort = -1;
    // polarity : 1 true, -1 false, 0 neither
    int pol = 0;
    Kind k = n.getKind();
@@ -103,67 +101,65 @@ bool ExtfSolver::doReduction(int effort, Node n)
    {
      pol = d_extfInfoTmp[n].d_const.getConst<bool>() ? 1 : -1;
    }
-  if (k == STRING_CONTAINS)
+  // determine if it is the right effort
+  if (k == STRING_SUBSTR || (k == STRING_CONTAINS && pol == 1))
    {
-    if (pol == 1)
+    // we reduce these semi-eagerly, at effort 1
+    if (effort != 1)
      {
-      r_effort = 1;
+      return false;
      }
-    else if (pol == -1)
+  }
+  else if (k == STRING_CONTAINS && pol == -1)
+  {
+    // negative contains reduces at level 2, or 3 if guessing model
+    int reffort = options().strings.stringModelBasedReduction ? 3 : 2;
+    if (effort == reffort)
      {
-      if (effort == 2)
+      Node x = n[0];
+      Node s = n[1];
+      std::vector<Node> lexp;
+      Node lenx = d_state.getLength(x, lexp);
+      Node lens = d_state.getLength(s, lexp);
+      if (d_state.areEqual(lenx, lens))
        {
-        Node x = n[0];
-        Node s = n[1];
-        std::vector<Node> lexp;
-        Node lenx = d_state.getLength(x, lexp);
-        Node lens = d_state.getLength(s, lexp);
-        if (d_state.areEqual(lenx, lens))
-        {
-          Trace("strings-extf-debug")
-              << "  resolve extf : " << n
-              << " based on equal lengths disequality." << std::endl;
-          // We can reduce negative contains to a disequality when lengths are
-          // equal. In other words, len( x ) = len( s ) implies
-          //   ~contains( x, s ) reduces to x != s.
-          if (!d_state.areDisequal(x, s))
-          {
-            // len( x ) = len( s ) ^ ~contains( x, s ) => x != s
-            lexp.push_back(lenx.eqNode(lens));
-            lexp.push_back(n.negate());
-            Node xneqs = x.eqNode(s).negate();
-            d_im.sendInference(
-                lexp, xneqs, InferenceId::STRINGS_CTN_NEG_EQUAL, false, true);
-          }
-          // this depends on the current assertions, so this
-          // inference is context-dependent
-          d_extt.markReduced(n, ExtReducedId::STRINGS_NEG_CTN_DEQ, true);
-          return true;
-        }
-        else
+        Trace("strings-extf-debug")
+            << "  resolve extf : " << n
+            << " based on equal lengths disequality." << std::endl;
+        // We can reduce negative contains to a disequality when lengths are
+        // equal. In other words, len( x ) = len( s ) implies
+        //   ~contains( x, s ) reduces to x != s.
+        if (!d_state.areDisequal(x, s))
          {
-          r_effort = 2;
+          // len( x ) = len( s ) ^ ~contains( x, s ) => x != s
+          lexp.push_back(lenx.eqNode(lens));
+          lexp.push_back(n.negate());
+          Node xneqs = x.eqNode(s).negate();
+          d_im.sendInference(
+              lexp, xneqs, InferenceId::STRINGS_CTN_NEG_EQUAL, false, true);
          }
+        // this depends on the current assertions, so this
+        // inference is context-dependent
+        d_extt.markReduced(n, ExtReducedId::STRINGS_NEG_CTN_DEQ, true);
+        return true;
        }
      }
+    else
+    {
+      return false;
+    }
    }
-  else if (k == STRING_SUBSTR)
-  {
-    r_effort = 1;
-  }
-  else if (k == SEQ_UNIT)
+  else if (k == SEQ_UNIT || k == STRING_IN_REGEXP || k == STRING_TO_CODE
+           || (k == STRING_CONTAINS && pol == 0))
    {
-    // never necessary to reduce seq.unit
+    // never necessary to reduce seq.unit. str.to_code or str.in_re here.
+    // also, we do not reduce str.contains that are preregistered but not
+    // asserted (pol=0).
      return false;
    }
-  else if (k != STRING_IN_REGEXP)
-  {
-    r_effort = 2;
-  }
-  if (effort != r_effort)
+  else if (effort != 2)
    {
-    Trace("strings-extf-debug") << "...skip due to effort" << std::endl;
-    // not the right effort level to reduce
+    // all other operators reduce at level 2
      return false;
    }
    Node c_n = pol == -1 ? n.negate() : n;
@@ -190,7 +186,7 @@ bool ExtfSolver::doReduction(int effort, Node n)
      // context-dependent because it depends on the polarity of n itself
      d_extt.markReduced(n, ExtReducedId::STRINGS_POS_CTN, true);
    }
-  else if (k != kind::STRING_TO_CODE)
+  else
    {
      NodeManager* nm = NodeManager::currentNM();
      Assert(k == STRING_SUBSTR || k == STRING_UPDATE || k == STRING_CONTAINS
@@ -198,7 +194,8 @@ bool ExtfSolver::doReduction(int effort, Node n)
             || k == STRING_STOI || k == STRING_REPLACE || k == STRING_REPLACE_ALL
             || k == SEQ_NTH || k == STRING_REPLACE_RE
             || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ
-           || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV);
+           || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV)
+        << "Unknown reduction: " << k;
      std::vector<Node> new_nodes;
      Node res = d_preproc.simplify(n, new_nodes);
      Assert(res != n);
@@ -302,6 +299,9 @@ void ExtfSolver::checkExtfEval(int effort)
        // if rewrites to a constant, then do the inference and mark as reduced
        if (nrc.isConst())
        {
+        // at effort=3, our substitution is from the model, and we don't do
+        // inferences based on the model, instead we check whether the
+        // cosntraint is already equal to its expected value below.
          if (effort < 3)
          {
            d_extt.markReduced(n, ExtReducedId::STRINGS_SR_CONST);
@@ -412,15 +412,15 @@ void ExtfSolver::checkExtfEval(int effort)
          }
          reduced = true;
        }
-      else
+      else if (effort < 3)
        {
          // if this was a predicate which changed after substitution + rewriting
+        // We only do this before models are constructed (effort<3)
          if (!einfo.d_const.isNull() && nrc.getType().isBoolean() && nrc != n)
          {
            bool pol = einfo.d_const == d_true;
            Node nrcAssert = pol ? nrc : nrc.negate();
            Node nAssert = pol ? n : n.negate();
-          Assert(effort < 3);
            einfo.d_exp.push_back(nAssert);
            Trace("strings-extf-debug") << "  decomposable..." << std::endl;
            Trace("strings-extf") << "  resolve extf : " << sn << " -> " << nrc
@@ -446,15 +446,18 @@ void ExtfSolver::checkExtfEval(int effort)
          && inferProcessed.find(n) == inferProcessed.end())
      {
        inferProcessed.insert(n);
-      Assert(effort < 3);
        if (effort == 1)
        {
          Trace("strings-extf")
              << "  cannot rewrite extf : " << to_reduce << std::endl;
        }
        // we take to_reduce to be the (partially) reduced version of n, which
-      // is justified by the explanation in einfo.
-      checkExtfInference(n, to_reduce, einfo, effort);
+      // is justified by the explanation in einfo. We only do this if we are
+      // not based on the model (effort<3).
+      if (effort < 3)
+      {
+        checkExtfInference(n, to_reduce, einfo, effort);
+      }
        if (Trace.isOn("strings-extf-list"))
        {
          Trace("strings-extf-list") << "  * " << to_reduce;
@@ -719,6 +722,17 @@ std::vector<Node> ExtfSolver::getActive(Kind k) const
    return d_extt.getActive(k);
  }
  
+bool ExtfSolver::isActiveInModel(Node n) const
+{
+  std::map<Node, ExtfInfoTmp>::const_iterator it = d_extfInfoTmp.find(n);
+  if (it == d_extfInfoTmp.end())
+  {
+    Assert(false) << "isActiveInModel: Expected extf info for " << n;
+    return true;
+  }
+  return it->second.d_modelActive;
+}
+
  bool StringsExtfCallback::getCurrentSubstitution(
      int effort,
      const std::vector<Node>& vars,
diff --git a/src/theory/strings/extf_solver.h b/src/theory/strings/extf_solver.h

index 2a9ca2d45f379c0d7fdc8d7af7b759668058e513..d8837f2edb5be0ad3f0e7aed49cfa524cfaef08b 100644 (file)
--- a/src/theory/strings/extf_solver.h
+++ b/src/theory/strings/extf_solver.h
@@ -155,6 +155,12 @@ class ExtfSolver : protected EnvObj
     * context (see ExtTheory::getActive).
     */
    std::vector<Node> getActive(Kind k) const;
+  /**
+   * Return true if n is active in the model. If this method returns false,
+   * then n is already satisfied in the model (its value in the model is
+   * the same as its representative in the equality engine).
+   */
+  bool isActiveInModel(Node n) const;
    //---------------------------------- end information about ExtTheory
    /**
     * Print the relevant information regarding why we have a model, return as a
diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp

index 04de0ea448f91c6ae72ec084c74457b96941decd..24ce64842154fd9fe886a5adb005c5c75f284a10 100644 (file)
--- a/src/theory/strings/regexp_solver.cpp
+++ b/src/theory/strings/regexp_solver.cpp
@@ -62,8 +62,10 @@ Node RegExpSolver::mkAnd(Node c1, Node c2)
    return NodeManager::currentNM()->mkNode(AND, c1, c2);
  }
  
-void RegExpSolver::checkMemberships()
+void RegExpSolver::checkMemberships(int effort)
  {
+  Trace("regexp-process") << "Checking Memberships, effort = " << effort
+                          << " ... " << std::endl;
    // add the memberships
    std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
    // maps representatives to regular expression memberships in that class
@@ -90,236 +92,260 @@ void RegExpSolver::checkMemberships()
            << "  irrelevant (non-asserted) membership : " << n << std::endl;
      }
    }
-  check(assertedMems);
+  if (effort == 0)
+  {
+    // First check for conflict. We do this only if effort is 0, otherwise
+    // we have already run these checks in this SAT context.
+    if (checkInclInter(assertedMems))
+    {
+      return;
+    }
+  }
+  checkUnfold(assertedMems, effort);
  }
  
-void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
+bool RegExpSolver::checkInclInter(
+    const std::map<Node, std::vector<Node> >& mems)
  {
-  bool addedLemma = false;
-  bool changed = false;
-  std::vector<Node> processed;
-
-  Trace("regexp-process") << "Checking Memberships ... " << std::endl;
+  Trace("regexp-process") << "Checking inclusion/intersection ... "
+                          << std::endl;
    for (const std::pair<const Node, std::vector<Node> >& mr : mems)
    {
+    // copy the vector because it is modified in the call below
      std::vector<Node> mems2 = mr.second;
      Trace("regexp-process")
          << "Memberships(" << mr.first << ") = " << mr.second << std::endl;
      if (!checkEqcInclusion(mems2))
      {
        // conflict discovered, return
-      return;
+      return true;
      }
      if (!checkEqcIntersect(mems2))
      {
        // conflict discovered, return
-      return;
+      return true;
      }
    }
+  Trace("regexp-debug") << "... No Intersect Conflict in Memberships"
+                        << std::endl;
+  return false;
+}
  
-  Trace("regexp-debug")
-      << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
-      << std::endl;
-  if (!addedLemma)
+void RegExpSolver::checkUnfold(const std::map<Node, std::vector<Node> >& mems,
+                               int effort)
+{
+  Trace("regexp-process") << "Checking unfold ... " << std::endl;
+  bool addedLemma = false;
+  std::vector<Node> processed;
+
+  // get all memberships
+  std::map<Node, Node> allMems;
+  for (const std::pair<const Node, std::vector<Node> >& mr : mems)
    {
-    // get all memberships
-    std::map<Node, Node> allMems;
-    for (const std::pair<const Node, std::vector<Node> >& mr : mems)
+    for (const Node& m : mr.second)
      {
-      for (const Node& m : mr.second)
-      {
-        allMems[m] = mr.first;
-      }
+      allMems[m] = mr.first;
      }
+  }
  
-    NodeManager* nm = NodeManager::currentNM();
-    // representatives of strings that are the LHS of positive memberships that
-    // we unfolded
-    std::unordered_set<Node> repUnfold;
-    // check positive (e=0), then negative (e=1) memberships
-    for (unsigned e = 0; e < 2; e++)
+  NodeManager* nm = NodeManager::currentNM();
+  // representatives of strings that are the LHS of positive memberships that
+  // we unfolded
+  std::unordered_set<Node> repUnfold;
+  // Check positive (e=0), then negative (e=1) memberships. If we are doing
+  // model-based reductions, we process positive ones at effort=0, and negative
+  // ones at effort=3.
+  bool mbr = options().strings.stringModelBasedReduction;
+  size_t startE = mbr ? (effort > 0 ? 1 : 0) : 0;
+  size_t endE = mbr ? (effort > 0 ? 2 : 1) : 2;
+  for (size_t e = startE; e < endE; e++)
+  {
+    for (const std::pair<const Node, Node>& mp : allMems)
      {
-      for (const std::pair<const Node, Node>& mp : allMems)
+      Node assertion = mp.first;
+      Node rep = mp.second;
+      bool polarity = assertion.getKind() != NOT;
+      if (polarity != (e == 0))
+      {
+        continue;
+      }
+      // check regular expression membership
+      Trace("regexp-debug")
+          << "Check : " << assertion << " "
+          << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end()) << " "
+          << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
+          << std::endl;
+      if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
+          || d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
+      {
+        continue;
+      }
+      Trace("strings-regexp")
+          << "We have regular expression assertion : " << assertion
+          << std::endl;
+      Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
+      Assert(atom == rewrite(atom));
+      if (effort > 0 && !d_esolver.isActiveInModel(atom))
        {
-        Node assertion = mp.first;
-        Node rep = mp.second;
-        // check regular expression membership
-        Trace("regexp-debug")
-            << "Check : " << assertion << " "
-            << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
-            << " "
-            << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
-            << std::endl;
-        if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
-            || d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
-        {
-          continue;
-        }
          Trace("strings-regexp")
-            << "We have regular expression assertion : " << assertion
-            << std::endl;
-        Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
-        Assert(atom == rewrite(atom));
-        bool polarity = assertion.getKind() != NOT;
-        if (polarity != (e == 0))
-        {
-          continue;
-        }
-        bool flag = true;
-        Node x = atom[0];
-        Node r = atom[1];
-        Assert(rep == d_state.getRepresentative(x));
-        // The following code takes normal forms into account for the purposes
-        // of simplifying a regular expression membership x in R. For example,
-        // if x = "A" in the current context, then we may be interested in
-        // reasoning about ( x in R ) * { x -> "A" }. Say we update the
-        // membership to nx in R', then:
-        // - nfexp => ( x in R ) <=> nx in R'
-        // - rnfexp => R = R'
-        // We use these explanations below as assumptions on inferences when
-        // appropriate. Notice that for inferring conflicts and tautologies,
-        // we use the normal form of x always. This is because we always want to
-        // discover conflicts/tautologies whenever possible.
-        // For inferences based on regular expression unfolding, we do not use
-        // the normal form of x. The reason is that it is better to unfold
-        // regular expression memberships in a context-indepedent manner,
-        // that is, not taking into account the current normal form of x, since
-        // this ensures these lemmas are still relevant after backtracking.
-        std::vector<Node> nfexp;
-        std::vector<Node> rnfexp;
-        // The normal form of x is stored in nx, while x is left unchanged.
-        Node nx = x;
-        if (!x.isConst())
-        {
-          nx = d_csolver.getNormalString(x, nfexp);
-        }
-        // If r is not a constant regular expression, we update it based on
-        // normal forms, which may concretize its variables.
-        if (!d_regexp_opr.checkConstRegExp(r))
-        {
-          r = getNormalSymRegExp(r, rnfexp);
-          nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
-          changed = true;
-        }
-        Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
-                                   << nx << " IN " << r << std::endl;
-        if (nx != x || changed)
+            << "...ignore since inactive in model" << std::endl;
+        continue;
+      }
+      Node x = atom[0];
+      Node r = atom[1];
+      Assert(rep == d_state.getRepresentative(x));
+      // The following code takes normal forms into account for the purposes
+      // of simplifying a regular expression membership x in R. For example,
+      // if x = "A" in the current context, then we may be interested in
+      // reasoning about ( x in R ) * { x -> "A" }. Say we update the
+      // membership to nx in R', then:
+      // - nfexp => ( x in R ) <=> nx in R'
+      // - rnfexp => R = R'
+      // We use these explanations below as assumptions on inferences when
+      // appropriate. Notice that for inferring conflicts and tautologies,
+      // we use the normal form of x always. This is because we always want to
+      // discover conflicts/tautologies whenever possible.
+      // For inferences based on regular expression unfolding, we do not use
+      // the normal form of x. The reason is that it is better to unfold
+      // regular expression memberships in a context-indepedent manner,
+      // that is, not taking into account the current normal form of x, since
+      // this ensures these lemmas are still relevant after backtracking.
+      std::vector<Node> nfexp;
+      std::vector<Node> rnfexp;
+      // The normal form of x is stored in nx, while x is left unchanged.
+      Node nx = x;
+      if (!x.isConst())
+      {
+        nx = d_csolver.getNormalString(x, nfexp);
+      }
+      // If r is not a constant regular expression, we update it based on
+      // normal forms, which may concretize its variables.
+      bool changed = false;
+      if (!d_regexp_opr.checkConstRegExp(r))
+      {
+        r = getNormalSymRegExp(r, rnfexp);
+        nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
+        changed = true;
+      }
+      Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
+                                 << nx << " IN " << r << std::endl;
+      if (nx != x || changed)
+      {
+        // We rewrite the membership nx IN r.
+        Node tmp = rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
+        Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
+        if (tmp.isConst())
          {
-          // We rewrite the membership nx IN r.
-          Node tmp = rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
-          Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
-          if (tmp.isConst())
+          if (tmp.getConst<bool>() == polarity)
            {
-            if (tmp.getConst<bool>() == polarity)
-            {
-              // it is satisfied in this SAT context
-              d_regexp_ccached.insert(assertion);
-              continue;
-            }
-            else
-            {
-              // we have a conflict
-              std::vector<Node> iexp = nfexp;
-              std::vector<Node> noExplain;
-              iexp.push_back(assertion);
-              noExplain.push_back(assertion);
-              Node conc = Node::null();
-              d_im.sendInference(
-                  iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
-              addedLemma = true;
-              break;
-            }
+            // it is satisfied in this SAT context
+            d_regexp_ccached.insert(assertion);
+            continue;
+          }
+          else
+          {
+            // we have a conflict
+            std::vector<Node> iexp = nfexp;
+            std::vector<Node> noExplain;
+            iexp.push_back(assertion);
+            noExplain.push_back(assertion);
+            Node conc = Node::null();
+            d_im.sendInference(
+                iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
+            addedLemma = true;
+            break;
            }
          }
-        if (e == 1 && repUnfold.find(rep) != repUnfold.end())
-        {
-          // do not unfold negative memberships of strings that have new
-          // positive unfoldings. For example:
-          //   x in ("A")* ^ NOT x in ("B")*
-          // We unfold x = "A" ++ x' only. The intution here is that positive
-          // unfoldings lead to stronger constraints (equalities are stronger
-          // than disequalities), and are easier to check.
-          continue;
-        }
-        if (polarity)
+      }
+      if (e == 1 && repUnfold.find(rep) != repUnfold.end())
+      {
+        // do not unfold negative memberships of strings that have new
+        // positive unfoldings. For example:
+        //   x in ("A")* ^ NOT x in ("B")*
+        // We unfold x = "A" ++ x' only. The intution here is that positive
+        // unfoldings lead to stronger constraints (equalities are stronger
+        // than disequalities), and are easier to check.
+        continue;
+      }
+      bool doSimplify = true;
+      if (polarity)
+      {
+        doSimplify = checkPDerivative(x, r, atom, addedLemma, rnfexp);
+      }
+      else
+      {
+        if (!options().strings.stringExp)
          {
-          flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
+          throw LogicException(
+              "Strings Incomplete (due to Negative Membership) by default, "
+              "try --strings-exp option.");
          }
-        else
+      }
+      if (doSimplify)
+      {
+        // check if the term is atomic
+        Trace("strings-regexp")
+            << "Unroll/simplify membership of atomic term " << rep << std::endl;
+        // if so, do simple unrolling
+        Trace("strings-regexp") << "Simplify on " << atom << std::endl;
+        Node conc = d_regexp_opr.simplify(atom, polarity);
+        Trace("strings-regexp") << "...finished, got " << conc << std::endl;
+        // if simplifying successfully generated a lemma
+        if (!conc.isNull())
          {
-          if (!options().strings.stringExp)
+          std::vector<Node> iexp;
+          std::vector<Node> noExplain;
+          iexp.push_back(assertion);
+          noExplain.push_back(assertion);
+          Assert(atom.getKind() == STRING_IN_REGEXP);
+          if (polarity)
            {
-            throw LogicException(
-                "Strings Incomplete (due to Negative Membership) by default, "
-                "try --strings-exp option.");
+            d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
            }
-        }
-        if (flag)
-        {
-          // check if the term is atomic
-          Trace("strings-regexp")
-              << "Unroll/simplify membership of atomic term " << rep
-              << std::endl;
-          // if so, do simple unrolling
-          Trace("strings-regexp") << "Simplify on " << atom << std::endl;
-          Node conc = d_regexp_opr.simplify(atom, polarity);
-          Trace("strings-regexp") << "...finished, got " << conc << std::endl;
-          // if simplifying successfully generated a lemma
-          if (!conc.isNull())
+          else
            {
-            std::vector<Node> iexp;
-            std::vector<Node> noExplain;
-            iexp.push_back(assertion);
-            noExplain.push_back(assertion);
-            Assert(atom.getKind() == STRING_IN_REGEXP);
-            if (polarity)
-            {
-              d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
-            }
-            else
-            {
-              d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
-            }
-            InferenceId inf =
-                polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
-            // in very rare cases, we may find out that the unfolding lemma
-            // for a membership is equivalent to true, in spite of the RE
-            // not being rewritten to true.
-            if (d_im.sendInference(iexp, noExplain, conc, inf))
-            {
-              addedLemma = true;
-              if (e == 0)
-              {
-                // Remember that we have unfolded a membership for x
-                // notice that we only do this here, after we have definitely
-                // added a lemma.
-                repUnfold.insert(rep);
-              }
-            }
-            processed.push_back(assertion);
+            d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
            }
-          else
+          InferenceId inf = polarity ? InferenceId::STRINGS_RE_UNFOLD_POS
+                                     : InferenceId::STRINGS_RE_UNFOLD_NEG;
+          // in very rare cases, we may find out that the unfolding lemma
+          // for a membership is equivalent to true, in spite of the RE
+          // not being rewritten to true.
+          if (d_im.sendInference(iexp, noExplain, conc, inf))
            {
-            // otherwise we are incomplete
-            d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
+            addedLemma = true;
+            if (e == 0)
+            {
+              // Remember that we have unfolded a membership for x
+              // notice that we only do this here, after we have definitely
+              // added a lemma.
+              repUnfold.insert(rep);
+            }
            }
+          processed.push_back(assertion);
          }
-        if (d_state.isInConflict())
+        else
          {
-          break;
+          // otherwise we are incomplete
+          d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
          }
        }
+      if (d_state.isInConflict())
+      {
+        break;
+      }
      }
    }
+
    if (addedLemma)
    {
      if (!d_state.isInConflict())
      {
-      for (unsigned i = 0; i < processed.size(); i++)
+      for (const Node& p : processed)
        {
          Trace("strings-regexp")
-            << "...add " << processed[i] << " to u-cache." << std::endl;
-        d_regexp_ucached.insert(processed[i]);
+            << "...add " << p << " to u-cache." << std::endl;
+        d_regexp_ucached.insert(p);
        }
      }
    }
@@ -350,27 +376,48 @@ bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
        bool m2Neg = m2.getKind() == NOT;
        Node m2Lit = m2Neg ? m2[0] : m2;
  
-      // Both regular expression memberships have the same polarity
        if (m1Neg == m2Neg)
        {
-        if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
+        // Check whether the RE in membership m1 contains the one in m2, if
+        // so then m1 can be marked reduced if positive polarity, m2 if
+        // negative polarity.
+        // Notice that we do not do this if the non-reduced membership has
+        // already been unfolded, since memberships may reduce to other
+        // memberships that are included in the original, thus making the
+        // justification for the reduction cyclic.  For example, to reduce:
+        //  (not (str.in_re x (re.++ (re.* R1) R2)))
+        // We may rely on justifying this by the fact that (writing x[i:j] for
+        // substring) either:
+        //  (not (str.in_re x[:0] (re.* R1)))
+        //  (not (str.in_re x[0:] R2))
+        // The first is trivially satisfied, the second is equivalent to
+        //  (not (str.in_re x R2))
+        // where R2 is included in (re.++ (re.* R1) R2)). However, we cannot
+        // mark the latter as reduced.
+        bool basisUnfolded =
+            d_regexp_ucached.find(m1Neg ? m1 : m2) != d_regexp_ucached.end();
+        if (!basisUnfolded)
          {
-          if (m1Neg)
-          {
-            // ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
-            //   mark ~str.in.re(x, R2) as reduced
-            d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
-            remove.insert(m2);
-          }
-          else
+          // Both regular expression memberships have positive polarity
+          if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
            {
-            // str.in.re(x, R1) includes str.in.re(x, R2) --->
-            //   mark str.in.re(x, R1) as reduced
-            d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
-            remove.insert(m1);
+            if (m1Neg)
+            {
+              // ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
+              //   mark ~str.in.re(x, R2) as reduced
+              d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
+              remove.insert(m2);
+            }
+            else
+            {
+              // str.in.re(x, R1) includes str.in.re(x, R2) --->
+              //   mark str.in.re(x, R1) as reduced
+              d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
+              remove.insert(m1);
  
-            // We don't need to process m1 anymore
-            break;
+              // We don't need to process m1 anymore
+              break;
+            }
            }
          }
        }
diff --git a/src/theory/strings/regexp_solver.h b/src/theory/strings/regexp_solver.h

index d6c7f517b9aeb56c221406873bf739db19686529..f60d104f1da6fd71306ea87fc5010f550f858140 100644 (file)
--- a/src/theory/strings/regexp_solver.h
+++ b/src/theory/strings/regexp_solver.h
@@ -65,7 +65,7 @@ class RegExpSolver : protected EnvObj
     * for Regular Membership and Length Constraints over Unbounded Strings",
     * FroCoS 2015.
     */
-  void checkMemberships();
+  void checkMemberships(int effort);
  
   private:
    /** check
@@ -78,8 +78,15 @@ class RegExpSolver : protected EnvObj
     * The argument mems maps representative string terms r to memberships of the
     * form (t in R) or ~(t in R), where t = r currently holds in the equality
     * engine of the theory of strings.
+   *
+   * We check in two phases:
+   * (1) checkInclInter which checks if there are conflicts due to quick
+   * inclusion/intersection testing. This method returns true if a conflict is
+   * discovered.
+   * (2) checkUnfold, which unfolds regular expression memberships as necessary
     */
-  void check(const std::map<Node, std::vector<Node>>& mems);
+  bool checkInclInter(const std::map<Node, std::vector<Node>>& mems);
+  void checkUnfold(const std::map<Node, std::vector<Node>>& mems, int effort);
    /**
     * Check memberships in equivalence class for regular expression
     * inclusion.
diff --git a/src/theory/strings/strategy.cpp b/src/theory/strings/strategy.cpp

index 3bd3ef6a0f0c49334aa4f84ccd529544ee37c2b9..7338b99fd6bc56ef24c8ae580614f9cee1885275 100644 (file)
--- a/src/theory/strings/strategy.cpp
+++ b/src/theory/strings/strategy.cpp
@@ -130,19 +130,22 @@ void Strategy::initializeStrategy()
      {
        addStrategyStep(CHECK_LENGTH_EQC);
      }
-    if (options::stringExp() && !options::stringGuessModel())
+    if (options::stringExp())
      {
        addStrategyStep(CHECK_EXTF_REDUCTION, 2);
      }
      addStrategyStep(CHECK_MEMBERSHIP);
      addStrategyStep(CHECK_CARDINALITY);
      step_end[Theory::EFFORT_FULL] = d_infer_steps.size() - 1;
-    if (options::stringExp() && options::stringGuessModel())
+    if (options::stringModelBasedReduction())
      {
        step_begin[Theory::EFFORT_LAST_CALL] = d_infer_steps.size();
-      // these two steps are run in parallel
-      addStrategyStep(CHECK_EXTF_REDUCTION, 2, false);
        addStrategyStep(CHECK_EXTF_EVAL, 3);
+      if (options::stringExp())
+      {
+        addStrategyStep(CHECK_EXTF_REDUCTION, 3);
+      }
+      addStrategyStep(CHECK_MEMBERSHIP, 3);
        step_end[Theory::EFFORT_LAST_CALL] = d_infer_steps.size() - 1;
      }
      // set the beginning/ending ranges
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp

index caeb8065e2e3ca552be5fe80ea64421069d203df..7e79190e6ea3da9501efe4949666c3a9e417039c 100644 (file)
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -149,6 +149,9 @@ void TheoryStrings::finishInit()
    d_equalityEngine->addFunctionKind(kind::STRING_TOLOWER, eagerEval);
    d_equalityEngine->addFunctionKind(kind::STRING_TOUPPER, eagerEval);
    d_equalityEngine->addFunctionKind(kind::STRING_REV, eagerEval);
+
+  // memberships are not relevant for model building
+  d_valuation.setIrrelevantKind(kind::STRING_IN_REGEXP);
  }
  
  std::string TheoryStrings::identify() const
@@ -723,9 +726,12 @@ void TheoryStrings::postCheck(Effort e)
  }
  
  bool TheoryStrings::needsCheckLastEffort() {
-  if (options().strings.stringGuessModel)
+  if (options().strings.stringModelBasedReduction)
    {
-    return d_esolver.hasExtendedFunctions();
+    bool hasExtf = d_esolver.hasExtendedFunctions();
+    Trace("strings-process")
+        << "needsCheckLastEffort: hasExtf = " << hasExtf << std::endl;
+    return hasExtf;
    }
    return false;
  }
@@ -1115,7 +1121,7 @@ void TheoryStrings::runInferStep(InferStep s, int effort)
      case CHECK_LENGTH_EQC: d_csolver.checkLengthsEqc(); break;
      case CHECK_REGISTER_TERMS_NF: checkRegisterTermsNormalForms(); break;
      case CHECK_EXTF_REDUCTION: d_esolver.checkExtfReductions(effort); break;
-    case CHECK_MEMBERSHIP: d_rsolver.checkMemberships(); break;
+    case CHECK_MEMBERSHIP: d_rsolver.checkMemberships(effort); break;
      case CHECK_CARDINALITY: d_bsolver.checkCardinality(); break;
      default: Unreachable(); break;
    }
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Tue, 23 Nov 2021 22:47:09 +0000 (16:47 -0600)
committer	GitHub <noreply@github.com>
	Tue, 23 Nov 2021 22:47:09 +0000 (22:47 +0000)
src/options/strings_options.toml		patch \| blob \| history
src/theory/strings/extf_solver.cpp		patch \| blob \| history
src/theory/strings/extf_solver.h		patch \| blob \| history
src/theory/strings/regexp_solver.cpp		patch \| blob \| history
src/theory/strings/regexp_solver.h		patch \| blob \| history
src/theory/strings/strategy.cpp		patch \| blob \| history
src/theory/strings/theory_strings.cpp		patch \| blob \| history