Inferences and model construction taking into account seq.unit (#4607)
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>
Wed, 1 Jul 2020 13:46:07 +0000 (08:46 -0500)
committerGitHub <noreply@github.com>
Wed, 1 Jul 2020 13:46:07 +0000 (08:46 -0500)
Towards theory of sequences.

This makes the strings solver handle seq.unit, which requires two new inferences and updates to its model construction.

It also fixes a bug in the best content heuristic, which previously failed to update the best score.

src/theory/strings/base_solver.cpp
src/theory/strings/core_solver.cpp
src/theory/strings/extf_solver.cpp
src/theory/strings/infer_info.cpp
src/theory/strings/infer_info.h
src/theory/strings/rewrites.cpp
src/theory/strings/rewrites.h
src/theory/strings/sequences_rewriter.cpp
src/theory/strings/theory_strings.cpp
src/theory/strings/theory_strings_utils.cpp
src/theory/strings/theory_strings_utils.h

index 1af6c89ca7098f8788d7600ce0d4a71f487889b5..952f2669146acefd53f39c6768417f64f9edbc1f 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "theory/strings/base_solver.h"
 
+#include "expr/sequence.h"
 #include "options/strings_options.h"
 #include "theory/strings/theory_strings_utils.h"
 #include "theory/strings/word.h"
@@ -54,7 +55,7 @@ void BaseSolver::checkInit()
     if (!tn.isRegExp())
     {
       Node emps;
-      if (tn.isString())
+      if (tn.isStringLike())
       {
         d_stringsEqc.push_back(eqc);
         emps = Word::mkEmptyWord(tn);
@@ -64,19 +65,75 @@ void BaseSolver::checkInit()
       while (!eqc_i.isFinished())
       {
         Node n = *eqc_i;
-        if (n.isConst())
+        Kind k = n.getKind();
+        // process constant-like terms
+        if (utils::isConstantLike(n))
         {
-          d_eqcInfo[eqc].d_bestContent = n;
-          d_eqcInfo[eqc].d_base = n;
-          d_eqcInfo[eqc].d_exp = Node::null();
+          Node prev = d_eqcInfo[eqc].d_bestContent;
+          if (!prev.isNull())
+          {
+            // we have either (seq.unit x) = C, or (seq.unit x) = (seq.unit y)
+            // where C is a sequence constant.
+            Node cval =
+                prev.isConst() ? prev : (n.isConst() ? n : Node::null());
+            std::vector<Node> exp;
+            exp.push_back(prev.eqNode(n));
+            Node s, t;
+            if (cval.isNull())
+            {
+              // injectivity of seq.unit
+              s = prev[0];
+              t = n[0];
+            }
+            else
+            {
+              // should not have two constants in the same equivalence class
+              Assert(cval.getType().isSequence());
+              std::vector<Node> cchars = Word::getChars(cval);
+              if (cchars.size() == 1)
+              {
+                Node oval = prev.isConst() ? n : prev;
+                Assert(oval.getKind() == SEQ_UNIT);
+                s = oval[0];
+                t = cchars[0]
+                        .getConst<ExprSequence>()
+                        .getSequence()
+                        .getVec()[0];
+                // oval is congruent (ignored) in this context
+                d_congruent.insert(oval);
+              }
+              else
+              {
+                // (seq.unit x) = C => false if |C| != 1.
+                d_im.sendInference(
+                    exp, d_false, Inference::UNIT_CONST_CONFLICT);
+                return;
+              }
+            }
+            if (!d_state.areEqual(s, t))
+            {
+              // (seq.unit x) = (seq.unit y) => x=y, or
+              // (seq.unit x) = (seq.unit c) => x=c
+              Assert(s.getType() == t.getType());
+              d_im.sendInference(exp, s.eqNode(t), Inference::UNIT_INJ);
+            }
+          }
+          // update best content
+          if (prev.isNull() || n.isConst())
+          {
+            d_eqcInfo[eqc].d_bestContent = n;
+            d_eqcInfo[eqc].d_bestScore = 0;
+            d_eqcInfo[eqc].d_base = n;
+            d_eqcInfo[eqc].d_exp = Node::null();
+          }
         }
-        else if (tn.isInteger())
+        if (tn.isInteger())
         {
           // do nothing
         }
+        // process indexing
         else if (n.getNumChildren() > 0)
         {
-          Kind k = n.getKind();
           if (k != EQUAL)
           {
             if (d_congruent.find(n) == d_congruent.end())
@@ -87,7 +144,7 @@ void BaseSolver::checkInit()
               {
                 // check if we have inferred a new equality by removal of empty
                 // components
-                if (n.getKind() == STRING_CONCAT && !d_state.areEqual(nc, n))
+                if (k == STRING_CONCAT && !d_state.areEqual(nc, n))
                 {
                   std::vector<Node> exp;
                   size_t count[2] = {0, 0};
@@ -188,7 +245,7 @@ void BaseSolver::checkInit()
             }
           }
         }
-        else
+        else if (!n.isConst())
         {
           if (d_congruent.find(n) == d_congruent.end())
           {
@@ -351,6 +408,7 @@ void BaseSolver::checkConstantEquivalenceClasses(TermIndex* ti,
             Node nct = utils::mkNConcat(vecnc, n.getType());
             Assert(!nct.isConst());
             bei.d_bestContent = nct;
+            bei.d_bestScore = contentSize;
             bei.d_base = n;
             if (!exp.empty())
             {
@@ -449,8 +507,26 @@ void BaseSolver::checkCardinalityType(TypeNode tn,
                                       std::vector<std::vector<Node> >& cols,
                                       std::vector<Node>& lts)
 {
+  Trace("strings-card") << "Check cardinality (type " << tn << ")..."
+                        << std::endl;
   NodeManager* nm = NodeManager::currentNM();
-  Trace("strings-card") << "Check cardinality...." << std::endl;
+  uint32_t typeCardSize;
+  if (tn.isString())  // string-only
+  {
+    typeCardSize = d_cardSize;
+  }
+  else
+  {
+    Assert(tn.isSequence());
+    TypeNode etn = tn.getSequenceElementType();
+    if (etn.isInterpretedFinite())
+    {
+      // infinite cardinality, we are fine
+      return;
+    }
+    // TODO (cvc4-projects #23): how to handle sequence for finite types?
+    return;
+  }
   // for each collection
   for (unsigned i = 0, csize = cols.size(); i < csize; ++i)
   {
@@ -462,23 +538,18 @@ void BaseSolver::checkCardinalityType(TypeNode tn,
       // no restriction on sets in the partition of size 1
       continue;
     }
-    if (!tn.isString())
-    {
-      // TODO (cvc4-projects #23): how to handle sequence for finite types?
-      continue;
-    }
     // size > c^k
     unsigned card_need = 1;
     double curr = static_cast<double>(cols[i].size());
-    while (curr > d_cardSize)
+    while (curr > typeCardSize)
     {
-      curr = curr / static_cast<double>(d_cardSize);
+      curr = curr / static_cast<double>(typeCardSize);
       card_need++;
     }
     Trace("strings-card")
         << "Need length " << card_need
-        << " for this number of strings (where alphabet size is " << d_cardSize
-        << ")." << std::endl;
+        << " for this number of strings (where alphabet size is "
+        << typeCardSize << ") given type " << tn << "." << std::endl;
     // check if we need to split
     bool needsSplit = true;
     if (lr.isConst())
index 3bc953f2c2dac5dd16b84c56cf6570f15b0b78c9..a38d1627930e91127d64993cbdc894e030309abc 100644 (file)
@@ -707,15 +707,17 @@ void CoreSolver::getNormalForms(Node eqc,
   while( !eqc_i.isFinished() ){
     Node n = (*eqc_i);
     if( !d_bsolver.isCongruent(n) ){
-      if (n.isConst() || n.getKind() == STRING_CONCAT)
+      Kind nk = n.getKind();
+      bool isCLike = utils::isConstantLike(n);
+      if (isCLike || nk == STRING_CONCAT)
       {
         Trace("strings-process-debug") << "Get Normal Form : Process term " << n << " in eqc " << eqc << std::endl;
         NormalForm nf_curr;
-        if (n.isConst())
+        if (isCLike)
         {
           nf_curr.init(n);
         }
-        else if (n.getKind() == STRING_CONCAT)
+        else if (nk == STRING_CONCAT)
         {
           // set the base to n, we construct the other portions of nf_curr in
           // the following.
@@ -791,7 +793,8 @@ void CoreSolver::getNormalForms(Node eqc,
         }
         //if not equal to self
         std::vector<Node>& currv = nf_curr.d_nf;
-        if (currv.size() > 1 || (currv.size() == 1 && currv[0].isConst()))
+        if (currv.size() > 1
+            || (currv.size() == 1 && utils::isConstantLike(currv[0])))
         {
           // if in a build with assertions, check that normal form is acyclic
           if (Configuration::isAssertionBuild())
index 961d976cfb44a6da079a0096a812f7bd0e0c842b..4ee2f4919bc357e70339f32b8451a32f0cdb6bc2 100644 (file)
@@ -65,6 +65,7 @@ ExtfSolver::ExtfSolver(context::Context* c,
   d_extt.addFunctionKind(kind::STRING_TOLOWER);
   d_extt.addFunctionKind(kind::STRING_TOUPPER);
   d_extt.addFunctionKind(kind::STRING_REV);
+  d_extt.addFunctionKind(kind::SEQ_UNIT);
 
   d_true = NodeManager::currentNM()->mkConst(true);
   d_false = NodeManager::currentNM()->mkConst(false);
@@ -138,16 +139,18 @@ bool ExtfSolver::doReduction(int effort, Node n)
       }
     }
   }
-  else
+  else if (k == STRING_SUBSTR)
   {
-    if (k == STRING_SUBSTR)
-    {
-      r_effort = 1;
-    }
-    else if (k != STRING_IN_REGEXP)
-    {
-      r_effort = 2;
-    }
+    r_effort = 1;
+  }
+  else if (k == SEQ_UNIT)
+  {
+    // never necessary to reduce seq.unit
+    return false;
+  }
+  else if (k != STRING_IN_REGEXP)
+  {
+    r_effort = 2;
   }
   if (effort != r_effort)
   {
index 9640a57e26c4b83b854f928f25d59e11c4104877..c75e03440b358750246155048609e4c448bb6dc7 100644 (file)
@@ -26,6 +26,8 @@ const char* toString(Inference i)
     case Inference::I_CONST_MERGE: return "I_CONST_MERGE";
     case Inference::I_CONST_CONFLICT: return "I_CONST_CONFLICT";
     case Inference::I_NORM: return "I_NORM";
+    case Inference::UNIT_INJ: return "UNIT_INJ";
+    case Inference::UNIT_CONST_CONFLICT: return "UNIT_CONST_CONFLICT";
     case Inference::CARD_SP: return "CARD_SP";
     case Inference::CARDINALITY: return "CARDINALITY";
     case Inference::I_CYCLE_E: return "I_CYCLE_E";
index 1e37dc5b0ec3e536de10011aec3909bd6ce6da73..2a42b9fab3c8887d725a528ca66041c7b447ee6c 100644 (file)
@@ -59,6 +59,10 @@ enum class Inference : uint32_t
   // equal after e.g. removing strings that are currently empty. For example:
   //   y = "" ^ z = "" => x ++ y = z ++ x
   I_NORM,
+  // injectivity of seq.unit
+  UNIT_INJ,
+  // unit constant conflict
+  UNIT_CONST_CONFLICT,
   // A split due to cardinality
   CARD_SP,
   // The cardinality inference for strings, see Liang et al CAV 2014.
index 09d496f06d629e11b14ed6e3c3ffbb4e3c05258b..68b510de6717138765998b906783a7ab6f82470d 100644 (file)
@@ -202,6 +202,7 @@ const char* toString(Rewrite r)
     case Rewrite::LEN_CONCAT: return "LEN_CONCAT";
     case Rewrite::LEN_REPL_INV: return "LEN_REPL_INV";
     case Rewrite::LEN_CONV_INV: return "LEN_CONV_INV";
+    case Rewrite::LEN_SEQ_UNIT: return "LEN_SEQ_UNIT";
     case Rewrite::CHARAT_ELIM: return "CHARAT_ELIM";
     case Rewrite::SEQ_UNIT_EVAL: return "SEQ_UNIT_EVAL";
     default: return "?";
index a2cf5efc8d2b6d64b40859bbef0c8e756052005b..ccbdbc0cdff5efa4632f03c014670e721398eaec 100644 (file)
@@ -205,6 +205,7 @@ enum class Rewrite : uint32_t
   LEN_CONCAT,
   LEN_REPL_INV,
   LEN_CONV_INV,
+  LEN_SEQ_UNIT,
   CHARAT_ELIM,
   SEQ_UNIT_EVAL
 };
index 20b892d0fa3e4d3c404f3145120d47c01e2ba952..bb0fa1d9737d98a3ed5f353b1b04eac5d3813d5b 100644 (file)
@@ -576,6 +576,11 @@ Node SequencesRewriter::rewriteLength(Node node)
     Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
     return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
   }
+  else if (nk0 == SEQ_UNIT)
+  {
+    Node retNode = nm->mkConst(Rational(1));
+    return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
+  }
   return node;
 }
 
@@ -1836,6 +1841,7 @@ Node SequencesRewriter::rewriteContains(Node node)
         nb << emp.eqNode(t);
         for (const Node& c : vec)
         {
+          Assert(c.getType() == t.getType());
           nb << c.eqNode(t);
         }
 
@@ -3171,7 +3177,7 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
     Assert(ratLen.getDenominator() == 1);
     Integer intLen = ratLen.getNumerator();
     uint32_t u = intLen.getUnsignedInt();
-    if (stype.isString())
+    if (stype.isString())  // string-only
     {
       res = nm->mkConst(String(std::string(u, 'A')));
     }
@@ -3233,7 +3239,7 @@ Node SequencesRewriter::rewriteSeqUnit(Node node)
   {
     std::vector<Expr> seq;
     seq.push_back(node[0].toExpr());
-    TypeNode stype = nm->mkSequenceType(node[0].getType());
+    TypeNode stype = node[0].getType();
     Node ret = nm->mkConst(ExprSequence(stype.toType(), seq));
     return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
   }
index ae8b3b682a7f5916ef30e10ca11b9e536baa19aa..d1b18df133e5aec0d1a768762c35b5c7975a0789 100644 (file)
@@ -363,9 +363,15 @@ bool TheoryStrings::collectModelInfoType(
         NormalForm& nfe = d_csolver->getNormalForm(eqc);
         if (nfe.d_nf.size() == 1)
         {
+          // is it an equivalence class with a seq.unit term?
+          if (nfe.d_nf[0].getKind() == SEQ_UNIT)
+          {
+            pure_eq_assign[eqc] = nfe.d_nf[0];
+            Trace("strings-model") << "(unit: " << nfe.d_nf[0] << ") ";
+          }
           // does it have a code and the length of these equivalence classes are
           // one?
-          if (d_termReg.hasStringCode() && lts_values[i] == d_one)
+          else if (d_termReg.hasStringCode() && lts_values[i] == d_one)
           {
             EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false);
             if (eip && !eip->d_codeTerm.get().isNull())
@@ -419,7 +425,7 @@ bool TheoryStrings::collectModelInfoType(
       std::unique_ptr<SEnumLen> sel;
       Trace("strings-model") << "Cardinality of alphabet is "
                              << utils::getAlphabetCardinality() << std::endl;
-      if (tn.isString())
+      if (tn.isString())  // string-only
       {
         sel.reset(new StringEnumLen(
             currLen, currLen, utils::getAlphabetCardinality()));
@@ -535,7 +541,6 @@ bool TheoryStrings::collectModelInfoType(
         nc.push_back(r.isConst() ? r : processed[r]);
       }
       Node cc = utils::mkNConcat(nc, tn);
-      Assert(cc.isConst());
       Trace("strings-model")
           << "*** Determined constant " << cc << " for " << rn << std::endl;
       processed[rn] = cc;
@@ -543,12 +548,17 @@ bool TheoryStrings::collectModelInfoType(
       {
         // this should never happen due to the model soundness argument
         // for strings
-
         Unreachable() << "TheoryStrings::collectModelInfoType: "
                          "Inconsistent equality (unprocessed eqc)"
                       << std::endl;
         return false;
       }
+      else if (!cc.isConst())
+      {
+        // the value may be specified by seq.unit components, ensure this
+        // is marked as the skeleton for constructing values in this class.
+        m->assertSkeleton(cc);
+      }
     }
   }
   //Trace("strings-model") << "String Model : Assigned." << std::endl;
index f7768430999203a58b59385cc06cafb2c8b665f6..3cf14feadeba65d0f152607fd2b01a4cc3874cc6 100644 (file)
@@ -274,6 +274,8 @@ std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
       allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
 }
 
+bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
+
 bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
 {
   size_t i = start;
index e0480f32b45553438164ccc668b0ca8053094a37..803a5ffea103dda2df6a4583196088f0d5e3d68f 100644 (file)
@@ -147,6 +147,15 @@ Node mkSubstrChain(Node base,
  */
 std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x);
 
+/**
+ * Return if a string-like term n is "constant-like", that is, either a
+ * constant string/sequence, or an application of seq.unit.
+ *
+ * @param n The string-like term
+ * @return true if n is constant-like.
+ */
+bool isConstantLike(Node n);
+
 /**
  * Given a vector of regular expression nodes and a start index that points to
  * a wildcard, returns true if the wildcard is unbounded (i.e. it is followed