Better formalization of regular expression unfolding skolems (#6602)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Mon, 24 May 2021 14:39:56 +0000 (09:39 -0500)

committer GitHub <noreply@github.com>

Mon, 24 May 2021 14:39:56 +0000 (09:39 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Mon, 24 May 2021 14:39:56 +0000 (09:39 -0500)
committer GitHub <noreply@github.com>
Mon, 24 May 2021 14:39:56 +0000 (09:39 -0500)
diff --git a/src/expr/skolem_manager.cpp b/src/expr/skolem_manager.cpp

index 4004bf6fef6e4962baefc79064965c40212db7af..8e1c5e54c46e434ffdf881c6ea15d924cc1c70c4 100644 (file)
--- a/src/expr/skolem_manager.cpp
+++ b/src/expr/skolem_manager.cpp
@@ -53,6 +53,7 @@ const char* toString(SkolemFunId id)
      case SkolemFunId::SELECTOR_WRONG: return "SELECTOR_WRONG";
      case SkolemFunId::SHARED_SELECTOR: return "SHARED_SELECTOR";
      case SkolemFunId::SEQ_NTH_OOB: return "SEQ_NTH_OOB";
+    case SkolemFunId::RE_UNFOLD_POS_COMPONENT: return "RE_UNFOLD_POS_COMPONENT";
      default: return "?";
    }
  }
@@ -206,11 +207,37 @@ Node SkolemManager::mkSkolemFunction(SkolemFunId id,
      ss << "SKOLEM_FUN_" << id;
      Node k = nm->mkSkolem(ss.str(), tn, "an internal skolem function", flags);
      d_skolemFuns[key] = k;
+    d_skolemFunMap[k] = key;
      return k;
    }
    return it->second;
  }
  
+Node SkolemManager::mkSkolemFunction(SkolemFunId id,
+                                     TypeNode tn,
+                                     const std::vector<Node>& cacheVals,
+                                     int flags)
+{
+  Assert(cacheVals.size() > 1);
+  Node cacheVal = NodeManager::currentNM()->mkNode(SEXPR, cacheVals);
+  return mkSkolemFunction(id, tn, cacheVal, flags);
+}
+
+bool SkolemManager::isSkolemFunction(Node k,
+                                     SkolemFunId& id,
+                                     Node& cacheVal) const
+{
+  std::map<Node, std::tuple<SkolemFunId, TypeNode, Node>>::const_iterator it =
+      d_skolemFunMap.find(k);
+  if (it == d_skolemFunMap.end())
+  {
+    return false;
+  }
+  id = std::get<0>(it->second);
+  cacheVal = std::get<2>(it->second);
+  return true;
+}
+
  Node SkolemManager::mkDummySkolem(const std::string& prefix,
                                    const TypeNode& type,
                                    const std::string& comment,
diff --git a/src/expr/skolem_manager.h b/src/expr/skolem_manager.h

index d6215394167e1f619f645e50ba03f21c3872f12c..a7d35d1551b2d9e37b876c7c82b790cc129c12d5 100644 (file)
--- a/src/expr/skolem_manager.h
+++ b/src/expr/skolem_manager.h
@@ -29,6 +29,7 @@ class ProofGenerator;
  /** Skolem function identifier */
  enum class SkolemFunId
  {
+  NONE,
    /** an uninterpreted function f s.t. f(x) = x / 0.0 (real division) */
    DIV_BY_ZERO,
    /** an uninterpreted function f s.t. f(x) = x / 0 (integer division) */
@@ -43,6 +44,13 @@ enum class SkolemFunId
    SHARED_SELECTOR,
    /** an application of seq.nth that is out of bounds */
    SEQ_NTH_OOB,
+  /**
+   * Regular expression unfold component: if (str.in_re t R), where R is
+   * (re.++ r0 ... rn), then the RE_UNFOLD_POS_COMPONENT{t,R,i} is a string
+   * skolem ki such that t = (str.++ k0 ... kn) and (str.in_re k0 r0) for
+   * i = 0, ..., n.
+   */
+  RE_UNFOLD_POS_COMPONENT,
  };
  /** Converts a skolem function name to a string. */
  const char* toString(SkolemFunId id);
@@ -235,6 +243,16 @@ class SkolemManager
                          TypeNode tn,
                          Node cacheVal = Node::null(),
                          int flags = NodeManager::SKOLEM_DEFAULT);
+  /** Same as above, with multiple cache values */
+  Node mkSkolemFunction(SkolemFunId id,
+                        TypeNode tn,
+                        const std::vector<Node>& cacheVals,
+                        int flags = NodeManager::SKOLEM_DEFAULT);
+  /**
+   * Is k a skolem function? Returns true if k was generated by the above call.
+   * Updates the arguments to the values used when constructing it.
+   */
+  bool isSkolemFunction(Node k, SkolemFunId& id, Node& cacheVal) const;
    /**
     * Create a skolem constant with the given name, type, and comment. This
     * should only be used if the definition of the skolem does not matter.
@@ -281,10 +299,10 @@ class SkolemManager
    static Node getOriginalForm(Node n);
  
   private:
-  /**
-   * Cached of skolem functions for mkSkolemFunction above.
-   */
+  /** Cache of skolem functions for mkSkolemFunction above. */
    std::map<std::tuple<SkolemFunId, TypeNode, Node>, Node> d_skolemFuns;
+  /** Backwards mapping of above */
+  std::map<Node, std::tuple<SkolemFunId, TypeNode, Node>> d_skolemFunMap;
    /**
     * Mapping from witness terms to proof generators.
     */
diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp

index 8ee29f71230d865ab08e456ffc2dde16ecadebf6..29a4187ec0317af35943df96e56c96dee4ad07e8 100644 (file)
--- a/src/theory/strings/infer_proof_cons.cpp
+++ b/src/theory/strings/infer_proof_cons.cpp
@@ -712,11 +712,11 @@ void InferProofCons::convert(InferenceId infer,
            }
            else if (eunf.getKind() == AND)
            {
-            // equality is the last conjunct
+            // equality is the first conjunct
              std::vector<Node> childrenAE;
              childrenAE.push_back(eunf);
              std::vector<Node> argsAE;
-            argsAE.push_back(nm->mkConst(Rational(eunf.getNumChildren() - 1)));
+            argsAE.push_back(nm->mkConst(Rational(0)));
              Node eunfAE = psb.tryStep(PfRule::AND_ELIM, childrenAE, argsAE);
              Trace("strings-ipc-prefix")
                  << "--- and elim to " << eunfAE << std::endl;
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp

index bf4c20f854be5a4cab2aaba445a9fe6a630dde35..96351cda9d1772d8283aa3eac9b8e913a015c09c 100644 (file)
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -1017,16 +1017,7 @@ Node RegExpOpr::reduceRegExpPos(Node mem,
    {
      std::vector<Node> nvec;
      std::vector<Node> cc;
-    // get the (valid) existential for this membership
-    Node eform = getExistsForRegExpConcatMem(mem);
      SkolemManager* sm = nm->getSkolemManager();
-    // Notice that this rule does not introduce witness terms, instead it
-    // uses skolems in the conclusion of the proof rule directly. Thus,
-    // the existential eform does not need to be explicitly justified by a
-    // proof here, since it is only being used as an intermediate formula in
-    // this inference. Hence we do not pass a proof generator to mkSkolemize.
-    sm->mkSkolemize(eform, newSkolems, "rc", "regexp concat skolem");
-    Assert(newSkolems.size() == r.getNumChildren());
      // Look up skolems for each of the components. If sc has optimizations
      // enabled, this will return arguments of str.to_re.
      for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
@@ -1034,17 +1025,22 @@ Node RegExpOpr::reduceRegExpPos(Node mem,
        if (r[i].getKind() == STRING_TO_REGEXP)
        {
          // optimization, just take the body
-        newSkolems[i] = r[i][0];
+        newSkolems.push_back(r[i][0]);
        }
        else
        {
+        Node ivalue = nm->mkConst(Rational(i));
+        Node sk = sm->mkSkolemFunction(SkolemFunId::RE_UNFOLD_POS_COMPONENT,
+                                       s.getType(),
+                                       {mem[0], mem[1], ivalue});
+        newSkolems.push_back(sk);
          nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i]));
        }
      }
-    // (str.in_re x (re.++ R1 .... Rn)) =>
-    // (and (str.in_re k1 R1) ... (str.in_re kn Rn) (= x (str.++ k1 ... kn)))
+    // (str.in_re x (re.++ R0 .... Rn)) =>
+    // (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) )
      Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems));
-    nvec.push_back(lem);
+    nvec.insert(nvec.begin(), lem);
      conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
    }
    else if (k == REGEXP_STAR)
@@ -1574,50 +1570,6 @@ bool RegExpOpr::regExpIncludes(Node r1, Node r2)
    return result;
  }
  
-/**
- * Associating formulas with their "exists form", or an existentially
- * quantified formula that is equivalent to it. This is currently used
- * for regular expression memberships in the method below.
- */
-struct ExistsFormAttributeId
-{
-};
-typedef expr::Attribute<ExistsFormAttributeId, Node> ExistsFormAttribute;
-
-Node RegExpOpr::getExistsForRegExpConcatMem(Node mem)
-{
-  // get or make the exists form of the membership
-  ExistsFormAttribute efa;
-  if (mem.hasAttribute(efa))
-  {
-    // already computed
-    return mem.getAttribute(efa);
-  }
-  Assert(mem.getKind() == STRING_IN_REGEXP);
-  Node x = mem[0];
-  Node r = mem[1];
-  Assert(r.getKind() == REGEXP_CONCAT);
-  NodeManager* nm = NodeManager::currentNM();
-  TypeNode xtn = x.getType();
-  std::vector<Node> vars;
-  std::vector<Node> mems;
-  for (const Node& rc : r)
-  {
-    Node v = nm->mkBoundVar(xtn);
-    vars.push_back(v);
-    mems.push_back(nm->mkNode(STRING_IN_REGEXP, v, rc));
-  }
-  Node sconcat = nm->mkNode(STRING_CONCAT, vars);
-  Node eq = x.eqNode(sconcat);
-  mems.insert(mems.begin(), eq);
-  Node bvl = nm->mkNode(BOUND_VAR_LIST, vars);
-  Node ebody = nm->mkNode(AND, mems);
-  Node eform = nm->mkNode(EXISTS, bvl, ebody);
-  mem.setAttribute(efa, eform);
-  Trace("regexp-opr") << "Exists form " << mem << " : " << eform << std::endl;
-  return eform;
-}
-
  }  // namespace strings
  }  // namespace theory
  }  // namespace cvc5
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h

index a20e9a0a947b059b3ec33644d7ecfefd4e6682cb..04b5a999d345b01719a847ef309577182c130fe1 100644 (file)
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -190,18 +190,6 @@ class RegExpOpr {
    bool regExpIncludes(Node r1, Node r2);
  
   private:
-  /**
-   * Given a regular expression membership of the form:
-   *   (str.in_re x (re.++ R1 ... Rn))
-   * This returns the valid existentially quantified formula:
-   *   (exists ((x1 String) ... (xn String))
-   *      (=> (str.in_re x (re.++ R1 ... Rn))
-   *      (and (= x (str.++ x1 ... xn))
-   *           (str.in_re x1 R1) ... (str.in_re xn Rn))))
-   * Moreover, this formula is cached per regular expression membership via
-   * an attribute, meaning it is always the same for a given membership mem.
-   */
-  static Node getExistsForRegExpConcatMem(Node mem);
    /** pointer to the skolem cache used by this class */
    SkolemCache* d_sc;
  };
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Mon, 24 May 2021 14:39:56 +0000 (09:39 -0500)
committer	GitHub <noreply@github.com>
	Mon, 24 May 2021 14:39:56 +0000 (09:39 -0500)
src/expr/skolem_manager.cpp		patch \| blob \| history
src/expr/skolem_manager.h		patch \| blob \| history
src/theory/strings/infer_proof_cons.cpp		patch \| blob \| history
src/theory/strings/regexp_operation.cpp		patch \| blob \| history
src/theory/strings/regexp_operation.h		patch \| blob \| history