Add regular expression elimination module (#2400)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Thu, 30 Aug 2018 16:57:58 +0000 (11:57 -0500)

committer GitHub <noreply@github.com>

Thu, 30 Aug 2018 16:57:58 +0000 (11:57 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Thu, 30 Aug 2018 16:57:58 +0000 (11:57 -0500)
committer GitHub <noreply@github.com>
Thu, 30 Aug 2018 16:57:58 +0000 (11:57 -0500)
diff --git a/src/Makefile.am b/src/Makefile.am

index 7231820f810e2750e8810d88703b4fc5af737fa5..1a55d38101a09f5b05f1e335a0404ddea501a680 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -582,6 +582,8 @@ libcvc4_la_SOURCES = \
         theory/sets/theory_sets_rewriter.h \
         theory/sets/theory_sets_type_enumerator.h \
         theory/sets/theory_sets_type_rules.h \
+       theory/strings/regexp_elim.cpp \
+       theory/strings/regexp_elim.h \
         theory/strings/regexp_operation.cpp \
         theory/strings/regexp_operation.h \
         theory/strings/theory_strings.cpp \
diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml

index 3498b61837a715d2a40525defc0c9214b5b37ff9..c48583bb2fe2655fd2ea35b16baaca2e440c5474 100644 (file)
--- a/src/options/strings_options.toml
+++ b/src/options/strings_options.toml
@@ -209,6 +209,22 @@ header = "options/strings_options.h"
    read_only  = true
    help       = "do length propagation based on constant splits"
  
+[[option]]
+  name       = "regExpElim"
+  category   = "regular"
+  long       = "re-elim"
+  type       = "bool"
+  default    = "false"
+  help       = "elimination techniques for regular expressions"
+
+[[option]]
+  name       = "regExpElimAgg"
+  category   = "regular"
+  long       = "re-elim-agg"
+  type       = "bool"
+  default    = "false"
+  help       = "aggressive elimination techniques for regular expressions"
+
  [[option]]
    name       = "stringFlatForms"
    category   = "regular"
diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp

new file mode 100644 (file)

index 0000000..7d65a87
--- /dev/null
+++ b/src/theory/strings/regexp_elim.cpp
@@ -0,0 +1,446 @@
+/*********************                                                        */
+/*! \file regexp_elim.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of techniques for eliminating regular expressions
+ **
+ **/
+
+#include "theory/strings/regexp_elim.h"
+
+#include "options/strings_options.h"
+#include "theory/strings/theory_strings_rewriter.h"
+
+using namespace CVC4;
+using namespace CVC4::kind;
+using namespace CVC4::theory;
+using namespace CVC4::theory::strings;
+
+RegExpElimination::RegExpElimination()
+{
+  d_zero = NodeManager::currentNM()->mkConst(Rational(0));
+  d_one = NodeManager::currentNM()->mkConst(Rational(1));
+  d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1));
+}
+
+Node RegExpElimination::eliminate(Node atom)
+{
+  Assert(atom.getKind() == STRING_IN_REGEXP);
+  if (atom[1].getKind() == REGEXP_CONCAT)
+  {
+    return eliminateConcat(atom);
+  }
+  else if (atom[1].getKind() == REGEXP_STAR)
+  {
+    return eliminateStar(atom);
+  }
+  return Node::null();
+}
+
+Node RegExpElimination::eliminateConcat(Node atom)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Node x = atom[0];
+  Node lenx = nm->mkNode(STRING_LENGTH, x);
+  Node re = atom[1];
+  // memberships of the form x in re.++ * s1 * ... * sn *, where * are
+  // any number of repetitions (exact or indefinite) of re.allchar.
+  Trace("re-elim-debug") << "Try re concat with gaps " << atom << std::endl;
+  std::vector<Node> children;
+  TheoryStringsRewriter::getConcat(re, children);
+  bool onlySigmasAndConsts = true;
+  std::vector<Node> sep_children;
+  std::vector<unsigned> gap_minsize;
+  std::vector<bool> gap_exact;
+  // the first gap is initially strict zero
+  gap_minsize.push_back(0);
+  gap_exact.push_back(true);
+  for (const Node& c : children)
+  {
+    Trace("re-elim-debug") << "  " << c << std::endl;
+    onlySigmasAndConsts = false;
+    if (c.getKind() == STRING_TO_REGEXP)
+    {
+      onlySigmasAndConsts = true;
+      sep_children.push_back(c[0]);
+      // the next gap is initially strict zero
+      gap_minsize.push_back(0);
+      gap_exact.push_back(true);
+    }
+    else if (c.getKind() == REGEXP_STAR && c[0].getKind() == REGEXP_SIGMA)
+    {
+      // found a gap of any size
+      onlySigmasAndConsts = true;
+      gap_exact[gap_exact.size() - 1] = false;
+    }
+    else if (c.getKind() == REGEXP_SIGMA)
+    {
+      // add one to the minimum size of the gap
+      onlySigmasAndConsts = true;
+      gap_minsize[gap_minsize.size() - 1]++;
+    }
+    if (!onlySigmasAndConsts)
+    {
+      Trace("re-elim-debug") << "...cannot handle " << c << std::endl;
+      break;
+    }
+  }
+  // we should always rewrite concatenations that are purely re.allchar
+  // and re.*( re.allchar ).
+  Assert(!onlySigmasAndConsts || !sep_children.empty());
+  if (onlySigmasAndConsts && !sep_children.empty())
+  {
+    bool canProcess = true;
+    std::vector<Node> conj;
+    // The following constructs a set of constraints that encodes that a
+    // set of string terms are found, in order, in string x.
+    // prev_end stores the current (symbolic) index in x that we are
+    // searching.
+    Node prev_end = d_zero;
+    unsigned gap_minsize_end = gap_minsize.back();
+    bool gap_exact_end = gap_exact.back();
+    std::vector<Node> non_greedy_find_vars;
+    for (unsigned i = 0, size = sep_children.size(); i < size; i++)
+    {
+      Node sc = sep_children[i];
+      if (gap_minsize[i] > 0)
+      {
+        // the gap to this child is at least gap_minsize[i]
+        prev_end =
+            nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize[i])));
+      }
+      Node lensc = nm->mkNode(STRING_LENGTH, sc);
+      if (gap_exact[i])
+      {
+        // if the gap is exact, it is a substring constraint
+        Node curr = prev_end;
+        Node ss = nm->mkNode(STRING_SUBSTR, x, curr, lensc);
+        conj.push_back(ss.eqNode(sc));
+        prev_end = nm->mkNode(PLUS, curr, lensc);
+      }
+      else
+      {
+        // otherwise, we can use indexof to represent some next occurrence
+        if (gap_exact[i + 1] && i + 1 != size)
+        {
+          if (!options::regExpElimAgg())
+          {
+            canProcess = false;
+            break;
+          }
+          // if the gap after this one is strict, we need a non-greedy find
+          // thus, we add a symbolic constant
+          Node k = nm->mkBoundVar(nm->integerType());
+          non_greedy_find_vars.push_back(k);
+          prev_end = nm->mkNode(PLUS, prev_end, k);
+        }
+        Node curr = nm->mkNode(STRING_STRIDOF, x, sc, prev_end);
+        Node idofFind = curr.eqNode(d_neg_one).negate();
+        conj.push_back(idofFind);
+        prev_end = nm->mkNode(PLUS, curr, lensc);
+      }
+    }
+
+    if (canProcess)
+    {
+      // since sep_children is non-empty, conj is non-empty
+      Assert(!conj.empty());
+      // Process the last gap, if necessary.
+      // Notice that if the last gap is not exact and its minsize is zero,
+      // then the last indexof/substr constraint entails the following
+      // constraint, so it is not necessary to add.
+      if (gap_minsize_end > 0 || gap_exact_end)
+      {
+        Node fit = nm->mkNode(
+            gap_exact_end ? EQUAL : LEQ,
+            nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize_end))),
+            lenx);
+        conj.push_back(fit);
+      }
+      Node res = conj.size() == 1 ? conj[0] : nm->mkNode(AND, conj);
+      // process the non-greedy find variables
+      if (!non_greedy_find_vars.empty())
+      {
+        std::vector<Node> children;
+        for (const Node& v : non_greedy_find_vars)
+        {
+          Node bound = nm->mkNode(
+              AND, nm->mkNode(LEQ, d_zero, v), nm->mkNode(LT, v, lenx));
+          children.push_back(bound);
+        }
+        children.push_back(res);
+        Node body = nm->mkNode(AND, children);
+        Node bvl = nm->mkNode(BOUND_VAR_LIST, non_greedy_find_vars);
+        res = nm->mkNode(EXISTS, bvl, body);
+      }
+      // e.g., writing "A" for (str.to.re "A") and _ for re.allchar:
+      //   x in (re.++ "A" (re.* _) "B" (re.* _)) --->
+      //     substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1
+      //   x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) --->
+      //     indexof(x,"A",0)!=-1 ^
+      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^
+      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x)
+
+      // An example of a non-greedy find:
+      //   x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) --->
+      //     exists k. 0 <= k < len( x ) ^
+      //               indexof( x, "A", k ) != -1 ^
+      //               substr( x, indexof( x, "A", k )+2, 1 ) = "B"
+      return returnElim(atom, res, "concat-with-gaps");
+    }
+  }
+
+  if (!options::regExpElimAgg())
+  {
+    return Node::null();
+  }
+  // only aggressive rewrites below here
+
+  // if the first or last child is constant string, we can split the membership
+  // into a conjunction of two memberships.
+  Node sStartIndex = d_zero;
+  Node sLength = lenx;
+  std::vector<Node> sConstraints;
+  std::vector<Node> rexpElimChildren;
+  unsigned nchildren = children.size();
+  Assert(nchildren > 1);
+  for (unsigned r = 0; r < 2; r++)
+  {
+    unsigned index = r == 0 ? 0 : nchildren - 1;
+    Assert(children[index + (r == 0 ? 1 : -1)].getKind() != STRING_TO_REGEXP);
+    Node c = children[index];
+    if (c.getKind() == STRING_TO_REGEXP)
+    {
+      Node s = c[0];
+      Node lens = nm->mkNode(STRING_LENGTH, s);
+      Node sss = r == 0 ? d_zero : nm->mkNode(MINUS, lenx, lens);
+      Node ss = nm->mkNode(STRING_SUBSTR, x, sss, lens);
+      sConstraints.push_back(ss.eqNode(s));
+      if (r == 0)
+      {
+        sStartIndex = lens;
+      }
+      sLength = nm->mkNode(MINUS, sLength, lens);
+    }
+    if (r == 1 && !sConstraints.empty())
+    {
+      // add the middle children
+      for (unsigned i = 1; i < (nchildren - 1); i++)
+      {
+        rexpElimChildren.push_back(children[i]);
+      }
+    }
+    if (c.getKind() != STRING_TO_REGEXP)
+    {
+      rexpElimChildren.push_back(c);
+    }
+  }
+  Assert(rexpElimChildren.size() + sConstraints.size() == nchildren);
+  if (!sConstraints.empty())
+  {
+    Node ss = nm->mkNode(STRING_SUBSTR, x, sStartIndex, sLength);
+    Assert(!rexpElimChildren.empty());
+    Node regElim =
+        TheoryStringsRewriter::mkConcat(REGEXP_CONCAT, rexpElimChildren);
+    sConstraints.push_back(nm->mkNode(STRING_IN_REGEXP, ss, regElim));
+    Node ret = nm->mkNode(AND, sConstraints);
+    // e.g.
+    // x in re.++( "A", R ) ---> substr(x,0,1)="A" ^ substr(x,1,len(x)-1) in R
+    return returnElim(atom, ret, "concat-splice");
+  }
+  Assert(nchildren > 1);
+  for (unsigned i = 0; i < nchildren; i++)
+  {
+    if (children[i].getKind() == STRING_TO_REGEXP)
+    {
+      Node s = children[i][0];
+      Node lens = nm->mkNode(STRING_LENGTH, s);
+      // there exists an index in this string such that the substring is this
+      Node k;
+      std::vector<Node> echildren;
+      if (i == 0)
+      {
+        k = d_zero;
+      }
+      else if (i + 1 == nchildren)
+      {
+        k = nm->mkNode(MINUS, lenx, lens);
+      }
+      else
+      {
+        k = nm->mkBoundVar(nm->integerType());
+        Node bound =
+            nm->mkNode(AND,
+                       nm->mkNode(LEQ, d_zero, k),
+                       nm->mkNode(LT, k, nm->mkNode(MINUS, lenx, lens)));
+        echildren.push_back(bound);
+      }
+      Node substrEq = nm->mkNode(STRING_SUBSTR, x, k, lens).eqNode(s);
+      echildren.push_back(substrEq);
+      if (i > 0)
+      {
+        std::vector<Node> rprefix;
+        rprefix.insert(rprefix.end(), children.begin(), children.begin() + i);
+        Node rpn = TheoryStringsRewriter::mkConcat(REGEXP_CONCAT, rprefix);
+        Node substrPrefix = nm->mkNode(
+            STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, d_zero, k), rpn);
+        echildren.push_back(substrPrefix);
+      }
+      if (i + 1 < nchildren)
+      {
+        std::vector<Node> rsuffix;
+        rsuffix.insert(rsuffix.end(), children.begin() + i + 1, children.end());
+        Node rps = TheoryStringsRewriter::mkConcat(REGEXP_CONCAT, rsuffix);
+        Node ks = nm->mkNode(PLUS, k, lens);
+        Node substrSuffix = nm->mkNode(
+            STRING_IN_REGEXP,
+            nm->mkNode(STRING_SUBSTR, x, ks, nm->mkNode(MINUS, lenx, ks)),
+            rps);
+        echildren.push_back(substrSuffix);
+      }
+      Node body = nm->mkNode(AND, echildren);
+      if (k.getKind() == BOUND_VARIABLE)
+      {
+        Node bvl = nm->mkNode(BOUND_VAR_LIST, k);
+        body = nm->mkNode(EXISTS, bvl, body);
+      }
+      // e.g. x in re.++( R1, "AB", R2 ) --->
+      //  exists k.
+      //    0 <= k <= (len(x)-2) ^
+      //    substr( x, k, 2 ) = "AB" ^
+      //    substr( x, 0, k ) in R1 ^
+      //    substr( x, k+2, len(x)-(k+2) ) in R2
+      return returnElim(atom, body, "concat-find");
+    }
+  }
+  return Node::null();
+}
+
+Node RegExpElimination::eliminateStar(Node atom)
+{
+  if (!options::regExpElimAgg())
+  {
+    return Node::null();
+  }
+  // only aggressive rewrites below here
+
+  NodeManager* nm = NodeManager::currentNM();
+  Node x = atom[0];
+  Node lenx = nm->mkNode(STRING_LENGTH, x);
+  Node re = atom[1];
+  // for regular expression star,
+  // if the period is a fixed constant, we can turn it into a bounded
+  // quantifier
+  std::vector<Node> disj;
+  if (re[0].getKind() == REGEXP_UNION)
+  {
+    for (const Node& r : re[0])
+    {
+      disj.push_back(r);
+    }
+  }
+  else
+  {
+    disj.push_back(re[0]);
+  }
+  bool lenOnePeriod = true;
+  std::vector<Node> char_constraints;
+  Node index = nm->mkBoundVar(nm->integerType());
+  Node substr_ch = nm->mkNode(STRING_SUBSTR, x, index, d_one);
+  substr_ch = Rewriter::rewrite(substr_ch);
+  // handle the case where it is purely characters
+  for (const Node& r : disj)
+  {
+    Assert(r.getKind() != REGEXP_UNION);
+    Assert(r.getKind() != REGEXP_SIGMA);
+    lenOnePeriod = false;
+    // lenOnePeriod is true if this regular expression is a single character
+    // regular expression
+    if (r.getKind() == STRING_TO_REGEXP)
+    {
+      Node s = r[0];
+      if (s.isConst() && s.getConst<String>().size() == 1)
+      {
+        lenOnePeriod = true;
+      }
+    }
+    else if (r.getKind() == REGEXP_RANGE)
+    {
+      lenOnePeriod = true;
+    }
+    if (!lenOnePeriod)
+    {
+      break;
+    }
+    else
+    {
+      Node regexp_ch = nm->mkNode(STRING_IN_REGEXP, substr_ch, r);
+      regexp_ch = Rewriter::rewrite(regexp_ch);
+      Assert(regexp_ch.getKind() != STRING_IN_REGEXP);
+      char_constraints.push_back(regexp_ch);
+    }
+  }
+  if (lenOnePeriod)
+  {
+    Assert(!char_constraints.empty());
+    Node bound = nm->mkNode(
+        AND, nm->mkNode(LEQ, d_zero, index), nm->mkNode(LT, index, lenx));
+    Node conc = char_constraints.size() == 1 ? char_constraints[0]
+                                             : nm->mkNode(OR, char_constraints);
+    Node body = nm->mkNode(OR, bound.negate(), conc);
+    Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
+    Node res = nm->mkNode(FORALL, bvl, body);
+    // e.g.
+    //   x in (re.* (re.union "A" "B" )) --->
+    //   forall k. 0<=k<len(x) => (substr(x,k,1) in "A" OR substr(x,k,1) in "B")
+    return returnElim(atom, res, "star-char");
+  }
+  // otherwise, for stars of constant length these are periodic
+  if (disj.size() == 1)
+  {
+    Node r = disj[0];
+    if (r.getKind() == STRING_TO_REGEXP)
+    {
+      Node s = r[0];
+      if (s.isConst())
+      {
+        Node lens = nm->mkNode(STRING_LENGTH, s);
+        lens = Rewriter::rewrite(lens);
+        Assert(lens.isConst());
+        std::vector<Node> conj;
+        Node bound = nm->mkNode(
+            AND,
+            nm->mkNode(LEQ, d_zero, index),
+            nm->mkNode(LT, index, nm->mkNode(INTS_DIVISION, lenx, lens)));
+        Node conc =
+            nm->mkNode(STRING_SUBSTR, x, nm->mkNode(MULT, index, lens), lens)
+                .eqNode(s);
+        Node body = nm->mkNode(OR, bound.negate(), conc);
+        Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
+        Node res = nm->mkNode(FORALL, bvl, body);
+        res = nm->mkNode(
+            AND, nm->mkNode(INTS_MODULUS, lenx, lens).eqNode(d_zero), res);
+        // e.g.
+        //    x in ("abc")* --->
+        //    forall k. 0 <= k < (len( x ) div 3) => substr(x,3*k,3) = "abc" ^
+        //    len(x) mod 3 = 0
+        return returnElim(atom, res, "star-constant");
+      }
+    }
+  }
+  return Node::null();
+}
+
+Node RegExpElimination::returnElim(Node atom, Node atomElim, const char* id)
+{
+  Trace("re-elim") << "re-elim: " << atom << " to " << atomElim << " by " << id
+                   << "." << std::endl;
+  return atomElim;
+}
diff --git a/src/theory/strings/regexp_elim.h b/src/theory/strings/regexp_elim.h

new file mode 100644 (file)

index 0000000..eddf33e
--- /dev/null
+++ b/src/theory/strings/regexp_elim.h
@@ -0,0 +1,66 @@
+/*********************                                                        */
+/*! \file regexp_elim.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Techniques for eliminating regular expressions
+ **
+ **/
+
+#include "cvc4_private.h"
+#ifndef __CVC4__THEORY__STRINGS__REGEXP_ELIM_H
+#define __CVC4__THEORY__STRINGS__REGEXP_ELIM_H
+
+#include "expr/node.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+/** Regular expression membership elimination
+ *
+ * This class implements techniques for reducing regular expression memberships
+ * to bounded integer quantifiers + extended function constraints.
+ *
+ * It is used by TheoryStrings during ppRewrite.
+ */
+class RegExpElimination
+{
+ public:
+  RegExpElimination();
+  /** eliminate membership
+   *
+   * This method takes as input a regular expression membership atom of the
+   * form (str.in.re x R). If this method returns a non-null node ret, then ret
+   * is equivalent to atom.
+   */
+  Node eliminate(Node atom);
+
+ private:
+  /** common terms */
+  Node d_zero;
+  Node d_one;
+  Node d_neg_one;
+  /** return elimination
+   *
+   * This method is called when atom is rewritten to atomElim, and returns
+   * atomElim. id is an identifier indicating the reason for the elimination.
+   */
+  Node returnElim(Node atom, Node atomElim, const char* id);
+  /** elimination for regular expression concatenation */
+  Node eliminateConcat(Node atom);
+  /** elimination for regular expression star */
+  Node eliminateStar(Node atom);
+}; /* class RegExpElimination */
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
+
+#endif /* __CVC4__THEORY__STRINGS__REGEXP_ELIM_H */
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp

index 72e82edcaf6cabbc2a554822dc335e2e8e50ecbd..35835398c6be1a672963c86c75f8470f8ab16592 100644 (file)
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -793,7 +793,6 @@ Node TheoryStrings::expandDefinition(LogicRequest &logicRequest, Node node) {
    return node;
  }
  
-
  void TheoryStrings::check(Effort e) {
    if (done() && e<EFFORT_FULL) {
      return;
@@ -4301,6 +4300,19 @@ Node TheoryStrings::getNextDecisionRequest( unsigned& priority ) {
  
  Node TheoryStrings::ppRewrite(TNode atom) {
    Trace("strings-ppr") << "TheoryStrings::ppRewrite " << atom << std::endl;
+  Node atomElim;
+  if (options::regExpElim() && atom.getKind() == STRING_IN_REGEXP)
+  {
+    // aggressive elimination of regular expression membership
+    atomElim = d_regexp_elim.eliminate(atom);
+    if (!atomElim.isNull())
+    {
+      Trace("strings-ppr") << "  rewrote " << atom << " -> " << atomElim
+                           << " via regular expression elimination."
+                           << std::endl;
+      atom = atomElim;
+    }
+  }
    if( !options::stringLazyPreproc() ){
      //eager preprocess here
      std::vector< Node > new_nodes;
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h

index 2c0cb42aafddc76fba000b3ede65eecd22683bd4..c1bb1e0a067ad6e9eee0aa2a89bfae3d71854b7b 100644 (file)
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -22,6 +22,7 @@
  #include "context/cdhashset.h"
  #include "context/cdlist.h"
  #include "expr/attribute.h"
+#include "theory/strings/regexp_elim.h"
  #include "theory/strings/regexp_operation.h"
  #include "theory/strings/theory_strings_preprocess.h"
  #include "theory/theory.h"
@@ -618,10 +619,10 @@ private:
    NodeSet d_processed_memberships;
    // antecedant for why regexp membership must be true
    NodeNodeMap d_regexp_ant;
-  // membership length
-  //std::map< Node, bool > d_membership_length;
-  // regular expression operations
+  /** regular expression operation module */
    RegExpOpr d_regexp_opr;
+  /** regular expression elimination module */
+  RegExpElimination d_regexp_elim;
  
    CVC4::String getHeadConst( Node x );
    bool deriveRegExp( Node x, Node r, Node ant );
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp

index 1c68097ae1de16e664b0988895ba741d9b7ac0e2..354115850326d87cbd5dadbffc104c939cf7a069 100644 (file)
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -917,24 +917,45 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) {
      }
    }else if( r.getKind() == kind::REGEXP_CONCAT ){
      bool allSigma = true;
+    bool allSigmaStrict = true;
+    unsigned allSigmaMinSize = 0;
      bool allString = true;
      std::vector< Node > cc;
-    for(unsigned i=0; i<r.getNumChildren(); i++) {
-      Assert( r[i].getKind() != kind::REGEXP_EMPTY );
-      if( r[i].getKind() != kind::REGEXP_SIGMA ){
+    for (const Node& rc : r)
+    {
+      Assert(rc.getKind() != kind::REGEXP_EMPTY);
+      if (rc.getKind() == kind::REGEXP_SIGMA)
+      {
+        allSigmaMinSize++;
+      }
+      else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA)
+      {
+        allSigmaStrict = false;
+      }
+      else
+      {
          allSigma = false;
        }
-      if( r[i].getKind() != kind::STRING_TO_REGEXP ){
+      if (rc.getKind() != kind::STRING_TO_REGEXP)
+      {
          allString = false;
-      }else{
-        cc.push_back( r[i] );
+      }
+      else
+      {
+        cc.push_back(rc);
        }
      }
-    if( allSigma ){
-      Node num = NodeManager::currentNM()->mkConst( ::CVC4::Rational( r.getNumChildren() ) );
-      retNode = num.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x));
-    }else if( allString ){
-      retNode = x.eqNode( mkConcat( kind::STRING_CONCAT, cc ) );
+    if (allSigma)
+    {
+      Node num = nm->mkConst(Rational(allSigmaMinSize));
+      Node lenx = nm->mkNode(STRING_LENGTH, x);
+      retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
+      return returnRewrite(node, retNode, "re-concat-pure-allchar");
+    }
+    else if (allString)
+    {
+      retNode = x.eqNode(mkConcat(STRING_CONCAT, cc));
+      return returnRewrite(node, retNode, "re-concat-pure-str");
      }
    }else if( r.getKind()==kind::REGEXP_INTER || r.getKind()==kind::REGEXP_UNION ){
      std::vector< Node > mvec;
diff --git a/test/regress/Makefile.tests b/test/regress/Makefile.tests

index 6187cb2fa157ca938ba0d7852790d2409cea7d34..63593c662fad3cc5523c6013324f40535399c066 100644 (file)
--- a/test/regress/Makefile.tests
+++ b/test/regress/Makefile.tests
@@ -814,6 +814,7 @@ REG0_TESTS = \
         regress0/strings/norn-31.smt2 \
         regress0/strings/norn-simp-rew.smt2 \
         regress0/strings/repl-rewrites2.smt2 \
+       regress0/strings/rewrites-re-concat.smt2 \
         regress0/strings/rewrites-v2.smt2 \
         regress0/strings/std2.6.1.smt2 \
         regress0/strings/str003.smt2 \
@@ -1485,6 +1486,7 @@ REG1_TESTS = \
         regress1/strings/idof-triv.smt2 \
         regress1/strings/ilc-l-nt.smt2 \
         regress1/strings/issue1105.smt2 \
+       regress1/strings/issue1684-regex.smt2 \
         regress1/strings/issue2060.smt2 \
         regress1/strings/kaluza-fl.smt2 \
         regress1/strings/loop002.smt2 \
@@ -1496,12 +1498,16 @@ REG1_TESTS = \
         regress1/strings/loop008.smt2 \
         regress1/strings/loop009.smt2 \
         regress1/strings/nf-ff-contains-abs.smt2 \
+       regress1/strings/non_termination_regular_expression4.smt2 \
+       regress1/strings/norn-13.smt2 \
         regress1/strings/norn-360.smt2 \
         regress1/strings/norn-ab.smt2 \
         regress1/strings/norn-nel-bug-052116.smt2 \
         regress1/strings/norn-simp-rew-sat.smt2 \
         regress1/strings/nterm-re-inter-sigma.smt2 \
         regress1/strings/pierre150331.smt2 \
+       regress1/strings/policy_variable.smt2 \
+       regress1/strings/re-elim-exact.smt2 \
         regress1/strings/re-unsound-080718.smt2 \
         regress1/strings/regexp001.smt2 \
         regress1/strings/regexp002.smt2 \
diff --git a/test/regress/regress0/strings/rewrites-re-concat.smt2 b/test/regress/regress0/strings/rewrites-re-concat.smt2

new file mode 100644 (file)

index 0000000..fe6691e
--- /dev/null
+++ b/test/regress/regress0/strings/rewrites-re-concat.smt2
@@ -0,0 +1,21 @@
+(set-info :smt-lib-version 2.5)
+(set-logic SLIA)
+(set-info :status unsat)
+
+(declare-fun x () String)
+(assert (str.in.re x (re.++ (str.to.re "baa") (re.* (str.to.re "a")) (re.* (str.to.re "c")))))
+
+(declare-fun y () String)
+(assert (< (str.len y) 2))
+
+(assert (or
+(not (str.in.re x (re.++ (str.to.re "baa") (re.* (str.to.re "a")) (re.* (str.to.re "a")) (re.* (str.to.re "c")))))
+(not (str.in.re x (re.++ (str.to.re "ba") (str.to.re "") (re.* (str.to.re "a")) (str.to.re "a") (re.* (str.to.re "c")))))
+(not (str.in.re x (re.++ (str.to.re "b") (re.* (str.to.re "a")) (str.to.re "a") (re.* (str.to.re "a")) (str.to.re "a") (re.* (str.to.re "c")))))
+
+(str.in.re y (re.++ re.allchar re.allchar (re.* re.allchar) re.allchar))
+(str.in.re y (re.++ re.allchar re.allchar re.allchar))
+)
+)
+
+(check-sat)
diff --git a/test/regress/regress1/strings/issue1684-regex.smt2 b/test/regress/regress1/strings/issue1684-regex.smt2

new file mode 100644 (file)

index 0000000..35eb845
--- /dev/null
+++ b/test/regress/regress1/strings/issue1684-regex.smt2
@@ -0,0 +1,8 @@
+(set-info :smt-lib-version 2.5)
+(set-logic QF_S)
+(set-info :status sat)
+(set-option :strings-exp true)
+(declare-const s String)
+(assert (str.in.re s (re.* (re.range "\x00" "\xFF"))))
+(assert (str.in.re s (re.range "\x00" "\xFF")))
+(check-sat)
+\ No newline at end of file
diff --git a/test/regress/regress1/strings/non_termination_regular_expression4.smt2 b/test/regress/regress1/strings/non_termination_regular_expression4.smt2

new file mode 100644 (file)

index 0000000..0b52734
--- /dev/null
+++ b/test/regress/regress1/strings/non_termination_regular_expression4.smt2
@@ -0,0 +1,36 @@
+(set-info :smt-lib-version 2.5)
+(set-logic ALL)
+(set-info :status unsat)
+(set-option :strings-exp true)
+(set-option :re-elim true)
+(declare-const actionName String)
+(declare-const actionNamespace String)
+
+; Action: p0.0
+(declare-const p0.0.action Bool)
+(assert (= p0.0.action (and (= actionNamespace "foobar") (str.in.re actionName (re.++ (str.to.re "foo") (re.* re.allchar) (re.++ (str.to.re "") re.allchar (str.to.re "bar")))))))
+
+; Policy: 0
+(declare-const p0.denies Bool)
+(assert (not p0.denies))
+(declare-const p0.allows Bool)
+(assert (= p0.allows (and (not p0.denies) p0.0.action)))
+(declare-const p0.neutral Bool)
+(assert (= p0.neutral (and (not p0.allows) (not p0.denies))))
+
+; Action: p1.0
+(declare-const p1.0.action Bool)
+(assert (= p1.0.action (and (= actionNamespace "foobar") (str.in.re actionName (re.++ (re.++ (str.to.re "foo") re.allchar (str.to.re "")) (re.* re.allchar) (str.to.re "bar"))))))
+
+; Policy: 1
+(declare-const p1.denies Bool)
+(assert (not p1.denies))
+(declare-const p1.allows Bool)
+(assert (= p1.allows (and (not p1.denies) p1.0.action)))
+(declare-const p1.neutral Bool)
+(assert (= p1.neutral (and (not p1.allows) (not p1.denies))))
+
+; Goals
+(assert p0.allows)
+(assert (or p1.denies p1.neutral))
+(check-sat)
diff --git a/test/regress/regress1/strings/norn-13.smt2 b/test/regress/regress1/strings/norn-13.smt2

new file mode 100644 (file)

index 0000000..912f447
--- /dev/null
+++ b/test/regress/regress1/strings/norn-13.smt2
@@ -0,0 +1,23 @@
+(set-logic QF_S)
+(set-option :strings-exp true)
+(set-info :status sat)
+
+(declare-fun var_0 () String)
+(declare-fun var_1 () String)
+(declare-fun var_2 () String)
+(declare-fun var_3 () String)
+(declare-fun var_4 () String)
+(declare-fun var_5 () String)
+(declare-fun var_6 () String)
+(declare-fun var_7 () String)
+(declare-fun var_8 () String)
+(declare-fun var_9 () String)
+(declare-fun var_10 () String)
+(declare-fun var_11 () String)
+(declare-fun var_12 () String)
+
+(assert (str.in.re "" (re.* (re.range "a" "u"))))
+(assert (str.in.re var_4 (re.* (re.range "a" "u"))))
+(assert (str.in.re var_4 (re.* (re.union (str.to.re "a") (str.to.re "b")))))
+(assert (not (str.in.re (str.++ var_4 "z" ) (re.* (str.to.re "z")))))
+(check-sat)
diff --git a/test/regress/regress1/strings/policy_variable.smt2 b/test/regress/regress1/strings/policy_variable.smt2

new file mode 100644 (file)

index 0000000..4d14e95
--- /dev/null
+++ b/test/regress/regress1/strings/policy_variable.smt2
@@ -0,0 +1,40 @@
+(set-info :smt-lib-version 2.5)
+(set-logic ALL)
+(set-info :status unsat)
+(set-option :strings-exp true)
+(set-option :re-elim true)
+(declare-const actionName String)
+(declare-const actionNamespace String)
+(declare-const example_key String)
+
+; Action: p0.0
+(declare-const p0.0.action Bool)
+(assert (= p0.0.action (and (= "foobar" actionNamespace) (and (str.prefixof "ab" actionName) (str.contains (str.substr actionName 2 (- (str.len actionName) 3)) "b") (str.suffixof "b" actionName) (not (= actionName "abb")) (not (= actionName "ab"))))))
+
+; Policy: 0
+(declare-const p0.denies Bool)
+(assert (not p0.denies))
+(declare-const p0.allows Bool)
+(assert (= p0.allows (and (not p0.denies) p0.0.action)))
+(declare-const p0.neutral Bool)
+(assert (= p0.neutral (and (not p0.allows) (not p0.denies))))
+
+; Action: p1.0
+(declare-const p1.0.action Bool)
+(assert (= p1.0.action (and (= "foobar" actionNamespace) (and (str.prefixof "a" actionName) (str.prefixof example_key (str.substr actionName 1 (- (str.len actionName) 1))) (str.contains (str.substr actionName (+ 1 (str.len example_key)) (- (str.len actionName) 1 (str.len example_key))) "b") (str.suffixof "b" actionName)))))
+
+(declare-const p1.0.condition Bool)
+(assert (= p1.0.condition (str.prefixof "bb" example_key)))
+
+; Policy: 1
+(declare-const p1.denies Bool)
+(assert (not p1.denies))
+(declare-const p1.allows Bool)
+(assert (= p1.allows (and (not p1.denies) p1.0.action p1.0.condition)))
+(declare-const p1.neutral Bool)
+(assert (= p1.neutral (and (not p1.allows) (not p1.denies))))
+
+; Goals
+(assert (or p0.neutral p0.denies))
+(assert p1.allows)
+(check-sat)
diff --git a/test/regress/regress1/strings/re-elim-exact.smt2 b/test/regress/regress1/strings/re-elim-exact.smt2

new file mode 100644 (file)

index 0000000..62b9346
--- /dev/null
+++ b/test/regress/regress1/strings/re-elim-exact.smt2
@@ -0,0 +1,10 @@
+(set-info :smt-lib-version 2.5)
+(set-logic ALL)
+(set-info :status sat)
+(set-option :strings-exp true)
+(set-option :re-elim true)
+(declare-fun x () String)
+
+(assert (str.in.re x (re.++ re.allchar (str.to.re "A") re.allchar (str.to.re "B") re.allchar)))
+
+(check-sat)
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Thu, 30 Aug 2018 16:57:58 +0000 (11:57 -0500)
committer	GitHub <noreply@github.com>
	Thu, 30 Aug 2018 16:57:58 +0000 (11:57 -0500)
src/Makefile.am		patch \| blob \| history
src/options/strings_options.toml		patch \| blob \| history
src/theory/strings/regexp_elim.cpp	[new file with mode: 0644]	patch \| blob
src/theory/strings/regexp_elim.h	[new file with mode: 0644]	patch \| blob
src/theory/strings/theory_strings.cpp		patch \| blob \| history
src/theory/strings/theory_strings.h		patch \| blob \| history
src/theory/strings/theory_strings_rewriter.cpp		patch \| blob \| history
test/regress/Makefile.tests		patch \| blob \| history
test/regress/regress0/strings/rewrites-re-concat.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/issue1684-regex.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/non_termination_regular_expression4.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/norn-13.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/policy_variable.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/re-elim-exact.smt2	[new file with mode: 0644]	patch \| blob