Inferences and model construction taking into account seq.unit (#4607)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Wed, 1 Jul 2020 13:46:07 +0000 (08:46 -0500)

committer GitHub <noreply@github.com>

Wed, 1 Jul 2020 13:46:07 +0000 (08:46 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Wed, 1 Jul 2020 13:46:07 +0000 (08:46 -0500)
committer GitHub <noreply@github.com>
Wed, 1 Jul 2020 13:46:07 +0000 (08:46 -0500)
diff --git a/src/theory/strings/base_solver.cpp b/src/theory/strings/base_solver.cpp

index 1af6c89ca7098f8788d7600ce0d4a71f487889b5..952f2669146acefd53f39c6768417f64f9edbc1f 100644 (file)
--- a/src/theory/strings/base_solver.cpp
+++ b/src/theory/strings/base_solver.cpp
@@ -15,6 +15,7 @@
  
  #include "theory/strings/base_solver.h"
  
+#include "expr/sequence.h"
  #include "options/strings_options.h"
  #include "theory/strings/theory_strings_utils.h"
  #include "theory/strings/word.h"
@@ -54,7 +55,7 @@ void BaseSolver::checkInit()
      if (!tn.isRegExp())
      {
        Node emps;
-      if (tn.isString())
+      if (tn.isStringLike())
        {
          d_stringsEqc.push_back(eqc);
          emps = Word::mkEmptyWord(tn);
@@ -64,19 +65,75 @@ void BaseSolver::checkInit()
        while (!eqc_i.isFinished())
        {
          Node n = *eqc_i;
-        if (n.isConst())
+        Kind k = n.getKind();
+        // process constant-like terms
+        if (utils::isConstantLike(n))
          {
-          d_eqcInfo[eqc].d_bestContent = n;
-          d_eqcInfo[eqc].d_base = n;
-          d_eqcInfo[eqc].d_exp = Node::null();
+          Node prev = d_eqcInfo[eqc].d_bestContent;
+          if (!prev.isNull())
+          {
+            // we have either (seq.unit x) = C, or (seq.unit x) = (seq.unit y)
+            // where C is a sequence constant.
+            Node cval =
+                prev.isConst() ? prev : (n.isConst() ? n : Node::null());
+            std::vector<Node> exp;
+            exp.push_back(prev.eqNode(n));
+            Node s, t;
+            if (cval.isNull())
+            {
+              // injectivity of seq.unit
+              s = prev[0];
+              t = n[0];
+            }
+            else
+            {
+              // should not have two constants in the same equivalence class
+              Assert(cval.getType().isSequence());
+              std::vector<Node> cchars = Word::getChars(cval);
+              if (cchars.size() == 1)
+              {
+                Node oval = prev.isConst() ? n : prev;
+                Assert(oval.getKind() == SEQ_UNIT);
+                s = oval[0];
+                t = cchars[0]
+                        .getConst<ExprSequence>()
+                        .getSequence()
+                        .getVec()[0];
+                // oval is congruent (ignored) in this context
+                d_congruent.insert(oval);
+              }
+              else
+              {
+                // (seq.unit x) = C => false if |C| != 1.
+                d_im.sendInference(
+                    exp, d_false, Inference::UNIT_CONST_CONFLICT);
+                return;
+              }
+            }
+            if (!d_state.areEqual(s, t))
+            {
+              // (seq.unit x) = (seq.unit y) => x=y, or
+              // (seq.unit x) = (seq.unit c) => x=c
+              Assert(s.getType() == t.getType());
+              d_im.sendInference(exp, s.eqNode(t), Inference::UNIT_INJ);
+            }
+          }
+          // update best content
+          if (prev.isNull() || n.isConst())
+          {
+            d_eqcInfo[eqc].d_bestContent = n;
+            d_eqcInfo[eqc].d_bestScore = 0;
+            d_eqcInfo[eqc].d_base = n;
+            d_eqcInfo[eqc].d_exp = Node::null();
+          }
          }
-        else if (tn.isInteger())
+        if (tn.isInteger())
          {
            // do nothing
          }
+        // process indexing
          else if (n.getNumChildren() > 0)
          {
-          Kind k = n.getKind();
            if (k != EQUAL)
            {
              if (d_congruent.find(n) == d_congruent.end())
@@ -87,7 +144,7 @@ void BaseSolver::checkInit()
                {
                  // check if we have inferred a new equality by removal of empty
                  // components
-                if (n.getKind() == STRING_CONCAT && !d_state.areEqual(nc, n))
+                if (k == STRING_CONCAT && !d_state.areEqual(nc, n))
                  {
                    std::vector<Node> exp;
                    size_t count[2] = {0, 0};
@@ -188,7 +245,7 @@ void BaseSolver::checkInit()
              }
            }
          }
-        else
+        else if (!n.isConst())
          {
            if (d_congruent.find(n) == d_congruent.end())
            {
@@ -351,6 +408,7 @@ void BaseSolver::checkConstantEquivalenceClasses(TermIndex* ti,
              Node nct = utils::mkNConcat(vecnc, n.getType());
              Assert(!nct.isConst());
              bei.d_bestContent = nct;
+            bei.d_bestScore = contentSize;
              bei.d_base = n;
              if (!exp.empty())
              {
@@ -449,8 +507,26 @@ void BaseSolver::checkCardinalityType(TypeNode tn,
                                        std::vector<std::vector<Node> >& cols,
                                        std::vector<Node>& lts)
  {
+  Trace("strings-card") << "Check cardinality (type " << tn << ")..."
+                        << std::endl;
    NodeManager* nm = NodeManager::currentNM();
-  Trace("strings-card") << "Check cardinality...." << std::endl;
+  uint32_t typeCardSize;
+  if (tn.isString())  // string-only
+  {
+    typeCardSize = d_cardSize;
+  }
+  else
+  {
+    Assert(tn.isSequence());
+    TypeNode etn = tn.getSequenceElementType();
+    if (etn.isInterpretedFinite())
+    {
+      // infinite cardinality, we are fine
+      return;
+    }
+    // TODO (cvc4-projects #23): how to handle sequence for finite types?
+    return;
+  }
    // for each collection
    for (unsigned i = 0, csize = cols.size(); i < csize; ++i)
    {
@@ -462,23 +538,18 @@ void BaseSolver::checkCardinalityType(TypeNode tn,
        // no restriction on sets in the partition of size 1
        continue;
      }
-    if (!tn.isString())
-    {
-      // TODO (cvc4-projects #23): how to handle sequence for finite types?
-      continue;
-    }
      // size > c^k
      unsigned card_need = 1;
      double curr = static_cast<double>(cols[i].size());
-    while (curr > d_cardSize)
+    while (curr > typeCardSize)
      {
-      curr = curr / static_cast<double>(d_cardSize);
+      curr = curr / static_cast<double>(typeCardSize);
        card_need++;
      }
      Trace("strings-card")
          << "Need length " << card_need
-        << " for this number of strings (where alphabet size is " << d_cardSize
-        << ")." << std::endl;
+        << " for this number of strings (where alphabet size is "
+        << typeCardSize << ") given type " << tn << "." << std::endl;
      // check if we need to split
      bool needsSplit = true;
      if (lr.isConst())
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp

index 3bc953f2c2dac5dd16b84c56cf6570f15b0b78c9..a38d1627930e91127d64993cbdc894e030309abc 100644 (file)
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -707,15 +707,17 @@ void CoreSolver::getNormalForms(Node eqc,
    while( !eqc_i.isFinished() ){
      Node n = (*eqc_i);
      if( !d_bsolver.isCongruent(n) ){
-      if (n.isConst() || n.getKind() == STRING_CONCAT)
+      Kind nk = n.getKind();
+      bool isCLike = utils::isConstantLike(n);
+      if (isCLike || nk == STRING_CONCAT)
        {
          Trace("strings-process-debug") << "Get Normal Form : Process term " << n << " in eqc " << eqc << std::endl;
          NormalForm nf_curr;
-        if (n.isConst())
+        if (isCLike)
          {
            nf_curr.init(n);
          }
-        else if (n.getKind() == STRING_CONCAT)
+        else if (nk == STRING_CONCAT)
          {
            // set the base to n, we construct the other portions of nf_curr in
            // the following.
@@ -791,7 +793,8 @@ void CoreSolver::getNormalForms(Node eqc,
          }
          //if not equal to self
          std::vector<Node>& currv = nf_curr.d_nf;
-        if (currv.size() > 1 || (currv.size() == 1 && currv[0].isConst()))
+        if (currv.size() > 1
+            || (currv.size() == 1 && utils::isConstantLike(currv[0])))
          {
            // if in a build with assertions, check that normal form is acyclic
            if (Configuration::isAssertionBuild())
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp

index 961d976cfb44a6da079a0096a812f7bd0e0c842b..4ee2f4919bc357e70339f32b8451a32f0cdb6bc2 100644 (file)
--- a/src/theory/strings/extf_solver.cpp
+++ b/src/theory/strings/extf_solver.cpp
@@ -65,6 +65,7 @@ ExtfSolver::ExtfSolver(context::Context* c,
    d_extt.addFunctionKind(kind::STRING_TOLOWER);
    d_extt.addFunctionKind(kind::STRING_TOUPPER);
    d_extt.addFunctionKind(kind::STRING_REV);
+  d_extt.addFunctionKind(kind::SEQ_UNIT);
  
    d_true = NodeManager::currentNM()->mkConst(true);
    d_false = NodeManager::currentNM()->mkConst(false);
@@ -138,16 +139,18 @@ bool ExtfSolver::doReduction(int effort, Node n)
        }
      }
    }
-  else
+  else if (k == STRING_SUBSTR)
    {
-    if (k == STRING_SUBSTR)
-    {
-      r_effort = 1;
-    }
-    else if (k != STRING_IN_REGEXP)
-    {
-      r_effort = 2;
-    }
+    r_effort = 1;
+  }
+  else if (k == SEQ_UNIT)
+  {
+    // never necessary to reduce seq.unit
+    return false;
+  }
+  else if (k != STRING_IN_REGEXP)
+  {
+    r_effort = 2;
    }
    if (effort != r_effort)
    {
diff --git a/src/theory/strings/infer_info.cpp b/src/theory/strings/infer_info.cpp

index 9640a57e26c4b83b854f928f25d59e11c4104877..c75e03440b358750246155048609e4c448bb6dc7 100644 (file)
--- a/src/theory/strings/infer_info.cpp
+++ b/src/theory/strings/infer_info.cpp
@@ -26,6 +26,8 @@ const char* toString(Inference i)
      case Inference::I_CONST_MERGE: return "I_CONST_MERGE";
      case Inference::I_CONST_CONFLICT: return "I_CONST_CONFLICT";
      case Inference::I_NORM: return "I_NORM";
+    case Inference::UNIT_INJ: return "UNIT_INJ";
+    case Inference::UNIT_CONST_CONFLICT: return "UNIT_CONST_CONFLICT";
      case Inference::CARD_SP: return "CARD_SP";
      case Inference::CARDINALITY: return "CARDINALITY";
      case Inference::I_CYCLE_E: return "I_CYCLE_E";
diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h

index 1e37dc5b0ec3e536de10011aec3909bd6ce6da73..2a42b9fab3c8887d725a528ca66041c7b447ee6c 100644 (file)
--- a/src/theory/strings/infer_info.h
+++ b/src/theory/strings/infer_info.h
@@ -59,6 +59,10 @@ enum class Inference : uint32_t
    // equal after e.g. removing strings that are currently empty. For example:
    //   y = "" ^ z = "" => x ++ y = z ++ x
    I_NORM,
+  // injectivity of seq.unit
+  UNIT_INJ,
+  // unit constant conflict
+  UNIT_CONST_CONFLICT,
    // A split due to cardinality
    CARD_SP,
    // The cardinality inference for strings, see Liang et al CAV 2014.
diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp

index 09d496f06d629e11b14ed6e3c3ffbb4e3c05258b..68b510de6717138765998b906783a7ab6f82470d 100644 (file)
--- a/src/theory/strings/rewrites.cpp
+++ b/src/theory/strings/rewrites.cpp
@@ -202,6 +202,7 @@ const char* toString(Rewrite r)
      case Rewrite::LEN_CONCAT: return "LEN_CONCAT";
      case Rewrite::LEN_REPL_INV: return "LEN_REPL_INV";
      case Rewrite::LEN_CONV_INV: return "LEN_CONV_INV";
+    case Rewrite::LEN_SEQ_UNIT: return "LEN_SEQ_UNIT";
      case Rewrite::CHARAT_ELIM: return "CHARAT_ELIM";
      case Rewrite::SEQ_UNIT_EVAL: return "SEQ_UNIT_EVAL";
      default: return "?";
diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h

index a2cf5efc8d2b6d64b40859bbef0c8e756052005b..ccbdbc0cdff5efa4632f03c014670e721398eaec 100644 (file)
--- a/src/theory/strings/rewrites.h
+++ b/src/theory/strings/rewrites.h
@@ -205,6 +205,7 @@ enum class Rewrite : uint32_t
    LEN_CONCAT,
    LEN_REPL_INV,
    LEN_CONV_INV,
+  LEN_SEQ_UNIT,
    CHARAT_ELIM,
    SEQ_UNIT_EVAL
  };
diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp

index 20b892d0fa3e4d3c404f3145120d47c01e2ba952..bb0fa1d9737d98a3ed5f353b1b04eac5d3813d5b 100644 (file)
--- a/src/theory/strings/sequences_rewriter.cpp
+++ b/src/theory/strings/sequences_rewriter.cpp
@@ -576,6 +576,11 @@ Node SequencesRewriter::rewriteLength(Node node)
      Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
      return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
    }
+  else if (nk0 == SEQ_UNIT)
+  {
+    Node retNode = nm->mkConst(Rational(1));
+    return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
+  }
    return node;
  }
  
@@ -1836,6 +1841,7 @@ Node SequencesRewriter::rewriteContains(Node node)
          nb << emp.eqNode(t);
          for (const Node& c : vec)
          {
+          Assert(c.getType() == t.getType());
            nb << c.eqNode(t);
          }
  
@@ -3171,7 +3177,7 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
      Assert(ratLen.getDenominator() == 1);
      Integer intLen = ratLen.getNumerator();
      uint32_t u = intLen.getUnsignedInt();
-    if (stype.isString())
+    if (stype.isString())  // string-only
      {
        res = nm->mkConst(String(std::string(u, 'A')));
      }
@@ -3233,7 +3239,7 @@ Node SequencesRewriter::rewriteSeqUnit(Node node)
    {
      std::vector<Expr> seq;
      seq.push_back(node[0].toExpr());
-    TypeNode stype = nm->mkSequenceType(node[0].getType());
+    TypeNode stype = node[0].getType();
      Node ret = nm->mkConst(ExprSequence(stype.toType(), seq));
      return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
    }
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp

index ae8b3b682a7f5916ef30e10ca11b9e536baa19aa..d1b18df133e5aec0d1a768762c35b5c7975a0789 100644 (file)
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -363,9 +363,15 @@ bool TheoryStrings::collectModelInfoType(
          NormalForm& nfe = d_csolver->getNormalForm(eqc);
          if (nfe.d_nf.size() == 1)
          {
+          // is it an equivalence class with a seq.unit term?
+          if (nfe.d_nf[0].getKind() == SEQ_UNIT)
+          {
+            pure_eq_assign[eqc] = nfe.d_nf[0];
+            Trace("strings-model") << "(unit: " << nfe.d_nf[0] << ") ";
+          }
            // does it have a code and the length of these equivalence classes are
            // one?
-          if (d_termReg.hasStringCode() && lts_values[i] == d_one)
+          else if (d_termReg.hasStringCode() && lts_values[i] == d_one)
            {
              EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false);
              if (eip && !eip->d_codeTerm.get().isNull())
@@ -419,7 +425,7 @@ bool TheoryStrings::collectModelInfoType(
        std::unique_ptr<SEnumLen> sel;
        Trace("strings-model") << "Cardinality of alphabet is "
                               << utils::getAlphabetCardinality() << std::endl;
-      if (tn.isString())
+      if (tn.isString())  // string-only
        {
          sel.reset(new StringEnumLen(
              currLen, currLen, utils::getAlphabetCardinality()));
@@ -535,7 +541,6 @@ bool TheoryStrings::collectModelInfoType(
          nc.push_back(r.isConst() ? r : processed[r]);
        }
        Node cc = utils::mkNConcat(nc, tn);
-      Assert(cc.isConst());
        Trace("strings-model")
            << "*** Determined constant " << cc << " for " << rn << std::endl;
        processed[rn] = cc;
@@ -543,12 +548,17 @@ bool TheoryStrings::collectModelInfoType(
        {
          // this should never happen due to the model soundness argument
          // for strings
-
          Unreachable() << "TheoryStrings::collectModelInfoType: "
                           "Inconsistent equality (unprocessed eqc)"
                        << std::endl;
          return false;
        }
+      else if (!cc.isConst())
+      {
+        // the value may be specified by seq.unit components, ensure this
+        // is marked as the skeleton for constructing values in this class.
+        m->assertSkeleton(cc);
+      }
      }
    }
    //Trace("strings-model") << "String Model : Assigned." << std::endl;
diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp

index f7768430999203a58b59385cc06cafb2c8b665f6..3cf14feadeba65d0f152607fd2b01a4cc3874cc6 100644 (file)
--- a/src/theory/strings/theory_strings_utils.cpp
+++ b/src/theory/strings/theory_strings_utils.cpp
@@ -274,6 +274,8 @@ std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
        allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
  }
  
+bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
+
  bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
  {
    size_t i = start;
diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h

index e0480f32b45553438164ccc668b0ca8053094a37..803a5ffea103dda2df6a4583196088f0d5e3d68f 100644 (file)
--- a/src/theory/strings/theory_strings_utils.h
+++ b/src/theory/strings/theory_strings_utils.h
@@ -147,6 +147,15 @@ Node mkSubstrChain(Node base,
   */
  std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x);
  
+/**
+ * Return if a string-like term n is "constant-like", that is, either a
+ * constant string/sequence, or an application of seq.unit.
+ *
+ * @param n The string-like term
+ * @return true if n is constant-like.
+ */
+bool isConstantLike(Node n);
+
  /**
   * Given a vector of regular expression nodes and a start index that points to
   * a wildcard, returns true if the wildcard is unbounded (i.e. it is followed
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Wed, 1 Jul 2020 13:46:07 +0000 (08:46 -0500)
committer	GitHub <noreply@github.com>
	Wed, 1 Jul 2020 13:46:07 +0000 (08:46 -0500)
src/theory/strings/base_solver.cpp		patch \| blob \| history
src/theory/strings/core_solver.cpp		patch \| blob \| history
src/theory/strings/extf_solver.cpp		patch \| blob \| history
src/theory/strings/infer_info.cpp		patch \| blob \| history
src/theory/strings/infer_info.h		patch \| blob \| history
src/theory/strings/rewrites.cpp		patch \| blob \| history
src/theory/strings/rewrites.h		patch \| blob \| history
src/theory/strings/sequences_rewriter.cpp		patch \| blob \| history
src/theory/strings/theory_strings.cpp		patch \| blob \| history
src/theory/strings/theory_strings_utils.cpp		patch \| blob \| history
src/theory/strings/theory_strings_utils.h		patch \| blob \| history