From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Wed, 6 Nov 2019 23:12:29 +0000 (-0600)
Subject: Move more string utility functions (#3398)
X-Git-Tag: cvc5-1.0.0~3850
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=cbd86eb4ed8bafc17f28244b746a376a019462f1;p=cvc5.git

Move more string utility functions (#3398)

This is work towards splitting a "core solver" object from TheoryStrings.

This moves global functions from TheoryStrings to InferenceManager/SolverState, making them accessible in the future by modules that have references to these objects.

It also corrects an issue where we were maintaining two `d_conflict` fields.
---

diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h
index 745a499d3..0f0329e61 100644
--- a/src/theory/strings/infer_info.h
+++ b/src/theory/strings/infer_info.h
@@ -87,6 +87,9 @@ std::ostream& operator<<(std::ostream& out, Inference i);
  */
 enum LengthStatus
 {
+  // The length of the Skolem should not be constrained. This should be
+  // used for Skolems whose length is already implied.
+  LENGTH_IGNORE,
   // The length of the Skolem is not specified, and should be split on.
   LENGTH_SPLIT,
   // The length of the Skolem is exactly one.
diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp
index 5c08fdee3..c9a2bcd70 100644
--- a/src/theory/strings/inference_manager.cpp
+++ b/src/theory/strings/inference_manager.cpp
@@ -34,10 +34,14 @@ InferenceManager::InferenceManager(TheoryStrings& p,
                                    context::UserContext* u,
                                    SolverState& s,
                                    OutputChannel& out)
-    : d_parent(p), d_state(s), d_out(out), d_keep(c)
+    : d_parent(p), d_state(s), d_out(out), d_keep(c), d_lengthLemmaTermsCache(u)
 {
-  d_true = NodeManager::currentNM()->mkConst(true);
-  d_false = NodeManager::currentNM()->mkConst(false);
+  NodeManager* nm = NodeManager::currentNM();
+  d_zero = nm->mkConst(Rational(0));
+  d_one = nm->mkConst(Rational(1));
+  d_emptyString = nm->mkConst(::CVC4::String(""));
+  d_true = nm->mkConst(true);
+  d_false = nm->mkConst(false);
 }
 
 bool InferenceManager::sendInternalInference(std::vector<Node>& exp,
@@ -131,7 +135,7 @@ void InferenceManager::sendInference(const std::vector<Node>& exp,
     Node eq_exp;
     if (options::stringRExplainLemmas())
     {
-      eq_exp = d_parent.mkExplain(exp, exp_n);
+      eq_exp = mkExplain(exp, exp_n);
     }
     else
     {
@@ -279,6 +283,95 @@ void InferenceManager::sendPhaseRequirement(Node lit, bool pol)
   d_pendingReqPhase[lit] = pol;
 }
 
+void InferenceManager::registerLength(Node n, LengthStatus s)
+{
+  if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
+  {
+    return;
+  }
+  d_lengthLemmaTermsCache.insert(n);
+
+  if (s == LENGTH_IGNORE)
+  {
+    // ignore it
+    return;
+  }
+
+  NodeManager* nm = NodeManager::currentNM();
+  Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
+
+  if (s == LENGTH_GEQ_ONE)
+  {
+    Node neq_empty = n.eqNode(d_emptyString).negate();
+    Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
+    Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
+    Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
+    d_out.lemma(len_geq_one);
+    return;
+  }
+
+  if (s == LENGTH_ONE)
+  {
+    Node len_one = n_len.eqNode(d_one);
+    Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
+    d_out.lemma(len_one);
+    return;
+  }
+  Assert(s == LENGTH_SPLIT);
+
+  if (options::stringSplitEmp() || !options::stringLenGeqZ())
+  {
+    Node n_len_eq_z = n_len.eqNode(d_zero);
+    Node n_len_eq_z_2 = n.eqNode(d_emptyString);
+    Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
+    case_empty = Rewriter::rewrite(case_empty);
+    Node case_nempty = nm->mkNode(GT, n_len, d_zero);
+    if (!case_empty.isConst())
+    {
+      Node lem = nm->mkNode(OR, case_empty, case_nempty);
+      d_out.lemma(lem);
+      Trace("strings-lemma")
+          << "Strings::Lemma LENGTH >= 0 : " << lem << std::endl;
+      // prefer trying the empty case first
+      // notice that requirePhase must only be called on rewritten literals that
+      // occur in the CNF stream.
+      n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
+      Assert(!n_len_eq_z.isConst());
+      d_out.requirePhase(n_len_eq_z, true);
+      n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
+      Assert(!n_len_eq_z_2.isConst());
+      d_out.requirePhase(n_len_eq_z_2, true);
+    }
+    else if (!case_empty.getConst<bool>())
+    {
+      // the rewriter knows that n is non-empty
+      Trace("strings-lemma")
+          << "Strings::Lemma LENGTH > 0 (non-empty): " << case_nempty
+          << std::endl;
+      d_out.lemma(case_nempty);
+    }
+    else
+    {
+      // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
+      // n ---> "". Since this method is only called on non-constants n, it must
+      // be that n = "" ^ len( n ) = 0 does not rewrite to true.
+      Assert(false);
+    }
+  }
+
+  // additionally add len( x ) >= 0 ?
+  if (options::stringLenGeqZ())
+  {
+    Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero);
+    n_len_geq = Rewriter::rewrite(n_len_geq);
+    d_out.lemma(n_len_geq);
+  }
+}
+
 void InferenceManager::addToExplanation(Node a,
                                         Node b,
                                         std::vector<Node>& exp) const
@@ -435,6 +528,104 @@ void InferenceManager::inferSubstitutionProxyVars(
   }
 }
 
+Node InferenceManager::mkExplain(const std::vector<Node>& a) const
+{
+  std::vector<Node> an;
+  return mkExplain(a, an);
+}
+
+Node InferenceManager::mkExplain(const std::vector<Node>& a,
+                                 const std::vector<Node>& an) const
+{
+  std::vector<TNode> antec_exp;
+  // copy to processing vector
+  std::vector<Node> aconj;
+  for (const Node& ac : a)
+  {
+    utils::flattenOp(AND, ac, aconj);
+  }
+  eq::EqualityEngine* ee = d_state.getEqualityEngine();
+  for (const Node& apc : aconj)
+  {
+    Assert(apc.getKind() != AND);
+    Debug("strings-explain") << "Add to explanation " << apc << std::endl;
+    if (apc.getKind() == NOT && apc[0].getKind() == EQUAL)
+    {
+      Assert(ee->hasTerm(apc[0][0]));
+      Assert(ee->hasTerm(apc[0][1]));
+      // ensure that we are ready to explain the disequality
+      AlwaysAssert(ee->areDisequal(apc[0][0], apc[0][1], true));
+    }
+    Assert(apc.getKind() != EQUAL || ee->areEqual(apc[0], apc[1]));
+    // now, explain
+    explain(apc, antec_exp);
+  }
+  for (const Node& anc : an)
+  {
+    if (std::find(antec_exp.begin(), antec_exp.end(), anc) == antec_exp.end())
+    {
+      Debug("strings-explain")
+          << "Add to explanation (new literal) " << anc << std::endl;
+      antec_exp.push_back(anc);
+    }
+  }
+  Node ant;
+  if (antec_exp.empty())
+  {
+    ant = d_true;
+  }
+  else if (antec_exp.size() == 1)
+  {
+    ant = antec_exp[0];
+  }
+  else
+  {
+    ant = NodeManager::currentNM()->mkNode(kind::AND, antec_exp);
+  }
+  return ant;
+}
+
+void InferenceManager::explain(TNode literal,
+                               std::vector<TNode>& assumptions) const
+{
+  Debug("strings-explain") << "Explain " << literal << " "
+                           << d_state.isInConflict() << std::endl;
+  eq::EqualityEngine* ee = d_state.getEqualityEngine();
+  bool polarity = literal.getKind() != NOT;
+  TNode atom = polarity ? literal : literal[0];
+  std::vector<TNode> tassumptions;
+  if (atom.getKind() == EQUAL)
+  {
+    if (atom[0] != atom[1])
+    {
+      Assert(ee->hasTerm(atom[0]));
+      Assert(ee->hasTerm(atom[1]));
+      ee->explainEquality(atom[0], atom[1], polarity, tassumptions);
+    }
+  }
+  else
+  {
+    ee->explainPredicate(atom, polarity, tassumptions);
+  }
+  for (const TNode a : tassumptions)
+  {
+    if (std::find(assumptions.begin(), assumptions.end(), a)
+        == assumptions.end())
+    {
+      assumptions.push_back(a);
+    }
+  }
+  if (Debug.isOn("strings-explain-debug"))
+  {
+    Debug("strings-explain-debug")
+        << "Explanation for " << literal << " was " << std::endl;
+    for (const TNode a : tassumptions)
+    {
+      Debug("strings-explain-debug") << "   " << a << std::endl;
+    }
+  }
+}
+
 }  // namespace strings
 }  // namespace theory
 }  // namespace CVC4
diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h
index 85855fab9..e052889f6 100644
--- a/src/theory/strings/inference_manager.h
+++ b/src/theory/strings/inference_manager.h
@@ -163,6 +163,29 @@ class InferenceManager
    * decided with polarity pol.
    */
   void sendPhaseRequirement(Node lit, bool pol);
+  /** register length
+   *
+   * This method is called on non-constant string terms n. It sends a lemma
+   * on the output channel that ensures that the length n satisfies its assigned
+   * status (given by argument s).
+   *
+   * If the status is LENGTH_ONE, we send the lemma len( n ) = 1.
+   *
+   * If the status is LENGTH_GEQ, we send a lemma n != "" ^ len( n ) > 0.
+   *
+   * If the status is LENGTH_SPLIT, we send a send a lemma of the form:
+   *   ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0
+   * This method also ensures that, when applicable, the left branch is taken
+   * first via calls to requirePhase.
+   *
+   * If the status is LENGTH_IGNORE, then no lemma is sent. This status is used
+   * e.g. when the length of n is already implied by other constraints.
+   *
+   * In contrast to the above functions, it makes immediate calls to the output
+   * channel instead of adding them to pending lists.
+   */
+  void registerLength(Node n, LengthStatus s);
+
   //----------------------------constructing antecedants
   /**
    * Adds equality a = b to the vector exp if a and b are distinct terms. It
@@ -212,6 +235,21 @@ class InferenceManager
   /** Do we have a pending lemma to send on the output channel? */
   bool hasPendingLemma() const { return !d_pendingLem.empty(); }
 
+  /** make explanation
+   *
+   * This returns a node corresponding to the explanation of formulas in a,
+   * interpreted conjunctively. The returned node is a conjunction of literals
+   * that have been asserted to the equality engine.
+   */
+  Node mkExplain(const std::vector<Node>& a) const;
+  /** Same as above, but the new literals an are append to the result */
+  Node mkExplain(const std::vector<Node>& a, const std::vector<Node>& an) const;
+  /**
+   * Explain literal l, add conjuncts to assumptions vector instead of making
+   * the node corresponding to their conjunction.
+   */
+  void explain(TNode literal, std::vector<TNode>& assumptions) const;
+
  private:
   /**
    * Indicates that ant => conc should be sent on the output channel of this
@@ -244,8 +282,11 @@ class InferenceManager
    */
   OutputChannel& d_out;
   /** Common constants */
+  Node d_emptyString;
   Node d_true;
   Node d_false;
+  Node d_zero;
+  Node d_one;
   /** The list of pending literals to assert to the equality engine */
   std::vector<Node> d_pending;
   /** A map from the literals in the above vector to their explanation */
@@ -261,6 +302,8 @@ class InferenceManager
    * SAT-context-dependent.
    */
   NodeSet d_keep;
+  /** List of terms that we have register length for */
+  NodeSet d_lengthLemmaTermsCache;
   /** infer substitution proxy vars
    *
    * This method attempts to (partially) convert the formula n into a
diff --git a/src/theory/strings/solver_state.cpp b/src/theory/strings/solver_state.cpp
index c370558b5..b69c99e8c 100644
--- a/src/theory/strings/solver_state.cpp
+++ b/src/theory/strings/solver_state.cpp
@@ -138,8 +138,14 @@ Node EqcInfo::addEndpointConst(Node t, Node c, bool isSuf)
   return Node::null();
 }
 
-SolverState::SolverState(context::Context* c, eq::EqualityEngine& ee)
-    : d_context(c), d_ee(ee), d_conflict(c, false), d_pendingConflict(c)
+SolverState::SolverState(context::Context* c,
+                         eq::EqualityEngine& ee,
+                         Valuation& v)
+    : d_context(c),
+      d_ee(ee),
+      d_valuation(v),
+      d_conflict(c, false),
+      d_pendingConflict(c)
 {
 }
 SolverState::~SolverState()
@@ -278,6 +284,51 @@ void SolverState::setPendingConflictWhen(Node conf)
 
 Node SolverState::getPendingConflict() const { return d_pendingConflict; }
 
+std::pair<bool, Node> SolverState::entailmentCheck(TheoryOfMode mode, TNode lit)
+{
+  return d_valuation.entailmentCheck(mode, lit);
+}
+
+void SolverState::separateByLength(const std::vector<Node>& n,
+                                   std::vector<std::vector<Node> >& cols,
+                                   std::vector<Node>& lts)
+{
+  unsigned leqc_counter = 0;
+  std::map<Node, unsigned> eqc_to_leqc;
+  std::map<unsigned, Node> leqc_to_eqc;
+  std::map<unsigned, std::vector<Node> > eqc_to_strings;
+  NodeManager* nm = NodeManager::currentNM();
+  for (const Node& eqc : n)
+  {
+    Assert(d_ee.getRepresentative(eqc) == eqc);
+    EqcInfo* ei = getOrMakeEqcInfo(eqc, false);
+    Node lt = ei ? ei->d_lengthTerm : Node::null();
+    if (!lt.isNull())
+    {
+      lt = nm->mkNode(STRING_LENGTH, lt);
+      Node r = d_ee.getRepresentative(lt);
+      if (eqc_to_leqc.find(r) == eqc_to_leqc.end())
+      {
+        eqc_to_leqc[r] = leqc_counter;
+        leqc_to_eqc[leqc_counter] = r;
+        leqc_counter++;
+      }
+      eqc_to_strings[eqc_to_leqc[r]].push_back(eqc);
+    }
+    else
+    {
+      eqc_to_strings[leqc_counter].push_back(eqc);
+      leqc_counter++;
+    }
+  }
+  for (const std::pair<const unsigned, std::vector<Node> >& p : eqc_to_strings)
+  {
+    cols.push_back(std::vector<Node>());
+    cols.back().insert(cols.back().end(), p.second.begin(), p.second.end());
+    lts.push_back(leqc_to_eqc[p.first]);
+  }
+}
+
 }  // namespace strings
 }  // namespace theory
 }  // namespace CVC4
diff --git a/src/theory/strings/solver_state.h b/src/theory/strings/solver_state.h
index f14b4b4af..a2001bb3b 100644
--- a/src/theory/strings/solver_state.h
+++ b/src/theory/strings/solver_state.h
@@ -23,6 +23,7 @@
 #include "context/context.h"
 #include "expr/node.h"
 #include "theory/uf/equality_engine.h"
+#include "theory/valuation.h"
 
 namespace CVC4 {
 namespace theory {
@@ -87,7 +88,7 @@ class EqcInfo
 class SolverState
 {
  public:
-  SolverState(context::Context* c, eq::EqualityEngine& ee);
+  SolverState(context::Context* c, eq::EqualityEngine& ee, Valuation& v);
   ~SolverState();
   //-------------------------------------- equality information
   /**
@@ -166,12 +167,28 @@ class SolverState
    * for some eqc that is currently equal to z.
    */
   void addEndpointsToEqcInfo(Node t, Node concat, Node eqc);
+  /** Entailment check
+   *
+   * This calls entailmentCheck on the Valuation object of theory of strings.
+   */
+  std::pair<bool, Node> entailmentCheck(TheoryOfMode mode, TNode lit);
+  /** Separate by length
+   *
+   * Separate the string representatives in argument n into a partition cols
+   * whose collections have equal length. The i^th vector in cols has length
+   * lts[i] for all elements in col.
+   */
+  void separateByLength(const std::vector<Node>& n,
+                        std::vector<std::vector<Node> >& cols,
+                        std::vector<Node>& lts);
 
  private:
   /** Pointer to the SAT context object used by the theory of strings. */
   context::Context* d_context;
   /** Reference to equality engine of the theory of strings. */
   eq::EqualityEngine& d_ee;
+  /** Reference to the valuation of the theory of strings */
+  Valuation& d_valuation;
   /** Are we in conflict? */
   context::CDO<bool> d_conflict;
   /** The pending conflict if one exists */
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index e34130c3f..9fad39b6b 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -93,13 +93,11 @@ TheoryStrings::TheoryStrings(context::Context* c,
     : Theory(THEORY_STRINGS, c, u, out, valuation, logicInfo),
       d_notify(*this),
       d_equalityEngine(d_notify, c, "theory::strings", true),
-      d_state(c, d_equalityEngine),
+      d_state(c, d_equalityEngine, d_valuation),
       d_im(*this, c, u, d_state, out),
-      d_conflict(c, false),
       d_nf_pairs(c),
       d_pregistered_terms_cache(u),
       d_registered_terms_cache(u),
-      d_length_lemma_terms_cache(u),
       d_preproc(&d_sk_cache, u),
       d_extf_infer_cache(c),
       d_ee_disequalities(c),
@@ -246,54 +244,24 @@ void TheoryStrings::propagate(Effort e) {
 bool TheoryStrings::propagate(TNode literal) {
   Debug("strings-propagate") << "TheoryStrings::propagate(" << literal  << ")" << std::endl;
   // If already in conflict, no more propagation
-  if (d_conflict) {
+  if (d_state.isInConflict())
+  {
     Debug("strings-propagate") << "TheoryStrings::propagate(" << literal << "): already in conflict" << std::endl;
     return false;
   }
   // Propagate out
   bool ok = d_out->propagate(literal);
   if (!ok) {
-    d_conflict = true;
+    d_state.setConflict();
   }
   return ok;
 }
 
-/** explain */
-void TheoryStrings::explain(TNode literal, std::vector<TNode>& assumptions) {
-  Debug("strings-explain") << "Explain " << literal << " " << d_conflict << std::endl;
-  bool polarity = literal.getKind() != kind::NOT;
-  TNode atom = polarity ? literal : literal[0];
-  unsigned ps = assumptions.size();
-  std::vector< TNode > tassumptions;
-  if (atom.getKind() == kind::EQUAL) {
-    if( atom[0]!=atom[1] ){
-      Assert(d_equalityEngine.hasTerm(atom[0]));
-      Assert(d_equalityEngine.hasTerm(atom[1]));
-      d_equalityEngine.explainEquality(atom[0], atom[1], polarity, tassumptions);
-    }
-  } else {
-    d_equalityEngine.explainPredicate(atom, polarity, tassumptions);
-  }
-  for( unsigned i=0; i<tassumptions.size(); i++ ){
-    if( std::find( assumptions.begin(), assumptions.end(), tassumptions[i] )==assumptions.end() ){
-      assumptions.push_back( tassumptions[i] );
-    }
-  }
-  if (Debug.isOn("strings-explain-debug"))
-  {
-    Debug("strings-explain-debug") << "Explanation for " << literal << " was "
-                                   << std::endl;
-    for (unsigned i = ps; i < assumptions.size(); i++)
-    {
-      Debug("strings-explain-debug") << "   " << assumptions[i] << std::endl;
-    }
-  }
-}
 
 Node TheoryStrings::explain( TNode literal ){
   Debug("strings-explain") << "explain called on " << literal << std::endl;
   std::vector< TNode > assumptions;
-  explain( literal, assumptions );
+  d_im.explain(literal, assumptions);
   if( assumptions.empty() ){
     return d_true;
   }else if( assumptions.size()==1 ){
@@ -556,7 +524,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
   std::map< Node, Node > processed;
   std::vector< std::vector< Node > > col;
   std::vector< Node > lts;
-  separateByLength( nodes, col, lts );
+  d_state.separateByLength(nodes, col, lts);
   //step 1 : get all values for known lengths
   std::vector< Node > lts_values;
   std::map<unsigned, Node> values_used;
@@ -905,7 +873,8 @@ void TheoryStrings::check(Effort e) {
   // Trace("strings-process") << "Theory of strings, check : " << e << std::endl;
   Trace("strings-check-debug")
       << "Theory of strings, check : " << e << std::endl;
-  while ( !done() && !d_conflict ) {
+  while (!done() && !d_state.isInConflict())
+  {
     // Get all the assertions
     Assertion assertion = get();
     TNode fact = assertion.assertion;
@@ -922,7 +891,8 @@ void TheoryStrings::check(Effort e) {
   Assert(d_strategy_init);
   std::map<Effort, std::pair<unsigned, unsigned> >::iterator itsr =
       d_strat_steps.find(e);
-  if (!d_conflict && !d_valuation.needCheck() && itsr != d_strat_steps.end())
+  if (!d_state.isInConflict() && !d_valuation.needCheck()
+      && itsr != d_strat_steps.end())
   {
     Trace("strings-check-debug")
         << "Theory of strings " << e << " effort check " << std::endl;
@@ -979,15 +949,15 @@ void TheoryStrings::check(Effort e) {
         Trace("strings-check") << "  ...finish run strategy: ";
         Trace("strings-check") << (addedFact ? "addedFact " : "");
         Trace("strings-check") << (addedLemma ? "addedLemma " : "");
-        Trace("strings-check") << (d_conflict ? "conflict " : "");
-        if (!addedFact && !addedLemma && !d_conflict)
+        Trace("strings-check") << (d_state.isInConflict() ? "conflict " : "");
+        if (!addedFact && !addedLemma && !d_state.isInConflict())
         {
           Trace("strings-check") << "(none)";
         }
         Trace("strings-check") << std::endl;
       }
       // repeat if we did not add a lemma or conflict
-    }while( !d_conflict && !addedLemma && addedFact );
+    } while (!d_state.isInConflict() && !addedLemma && addedFact);
   }
   Trace("strings-check") << "Theory of strings, done check : " << e << std::endl;
   Assert(!d_im.hasPendingFact());
@@ -1010,7 +980,7 @@ void TheoryStrings::checkExtfReductions( int effort ) {
   Trace("strings-process") << "  checking " << extf.size() << " active extf"
                            << std::endl;
   for( unsigned i=0; i<extf.size(); i++ ){
-    Assert(!d_conflict);
+    Assert(!d_state.isInConflict());
     Node n = extf[i];
     Trace("strings-process") << "  check " << n << ", active in model="
                              << d_extf_info_tmp[n].d_model_active << std::endl;
@@ -1058,9 +1028,10 @@ void TheoryStrings::checkMemberships()
 
 /** Conflict when merging two constants */
 void TheoryStrings::conflict(TNode a, TNode b){
-  if( !d_conflict ){
+  if (!d_state.isInConflict())
+  {
     Debug("strings-conflict") << "Making conflict..." << std::endl;
-    d_conflict = true;
+    d_state.setConflict();
     Node conflictNode;
     conflictNode = explain( a.eqNode(b) );
     Trace("strings-conflict") << "CONFLICT: Eq engine conflict : " << conflictNode << std::endl;
@@ -1282,7 +1253,7 @@ void TheoryStrings::assertPendingFact(Node atom, bool polarity, Node exp) {
     }
   }
   // process the conflict
-  if (!d_conflict)
+  if (!d_state.isInConflict())
   {
     Node pc = d_state.getPendingConflict();
     if (!pc.isNull())
@@ -1291,8 +1262,8 @@ void TheoryStrings::assertPendingFact(Node atom, bool polarity, Node exp) {
       a.push_back(pc);
       Trace("strings-pending")
           << "Process pending conflict " << pc << std::endl;
-      Node conflictNode = mkExplain(a);
-      d_conflict = true;
+      Node conflictNode = d_im.mkExplain(a);
+      d_state.setConflict();
       Trace("strings-conflict")
           << "CONFLICT: Eager prefix : " << conflictNode << std::endl;
       d_out->conflict(conflictNode);
@@ -1678,7 +1649,8 @@ void TheoryStrings::checkExtfEval( int effort ) {
             Trace("strings-extf") << "  resolve extf : " << sn << " -> " << nrc << std::endl;
             d_im.sendInference(
                 einfo.d_exp, conc, effort == 0 ? "EXTF" : "EXTF-N", true);
-            if( d_conflict ){
+            if (!d_state.isInConflict())
+            {
               Trace("strings-extf-debug") << "  conflict, return." << std::endl;
               return;
             }
@@ -2161,7 +2133,7 @@ void TheoryStrings::checkFlatForms()
       {
         bool isRev = r == 1;
         checkFlatForm(it->second, start, isRev);
-        if (d_conflict)
+        if (d_state.isInConflict())
         {
           return;
         }
@@ -2414,7 +2386,7 @@ void TheoryStrings::checkFlatForm(std::vector<Node>& eqc,
       std::stringstream ss;
       ss << infType;
       d_im.sendInference(exp, conc, ss.str().c_str());
-      if (d_conflict)
+      if (d_state.isInConflict())
       {
         return;
       }
@@ -3043,7 +3015,7 @@ void TheoryStrings::doInferInfo(const InferInfo& ii)
   {
     for (const Node& n : sks.second)
     {
-      registerLength(n, sks.first);
+      d_im.registerLength(n, sks.first);
     }
   }
 }
@@ -3075,7 +3047,7 @@ void TheoryStrings::processSimpleNEq(NormalForm& nfi,
         unsigned index_k = index;
         std::vector< Node > curr_exp;
         NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, curr_exp);
-        while (!d_conflict && index_k < (nfkv.size() - rproc))
+        while (!d_state.isInConflict() && index_k < (nfkv.size() - rproc))
         {
           //can infer that this string must be empty
           Node eq = nfkv[index_k].eqNode(d_emptyString);
@@ -3399,7 +3371,8 @@ void TheoryStrings::processSimpleNEq(NormalForm& nfi,
                       Node lt1 = e==0 ? length_term_i : length_term_j;
                       Node lt2 = e==0 ? length_term_j : length_term_i;
                       Node ent_lit = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::GT, lt1, lt2 ) );
-                      std::pair<bool, Node> et = d_valuation.entailmentCheck( THEORY_OF_TYPE_BASED, ent_lit );
+                      std::pair<bool, Node> et = d_state.entailmentCheck(
+                          THEORY_OF_TYPE_BASED, ent_lit);
                       if( et.first ){
                         Trace("strings-entail") << "Strings entailment : " << ent_lit << " is entailed in the current context." << std::endl;
                         Trace("strings-entail") << "  explanation was : " << et.second << std::endl;
@@ -3595,7 +3568,7 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(NormalForm& nfi,
     }
   }
 
-  Node ant = mkExplain(info.d_ant);
+  Node ant = d_im.mkExplain(info.d_ant);
   info.d_ant.clear();
   info.d_antn.push_back(ant);
 
@@ -3674,7 +3647,7 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(NormalForm& nfi,
     // right
     Node sk_w = d_sk_cache.mkSkolem("w_loop");
     Node sk_y = d_sk_cache.mkSkolem("y_loop");
-    registerLength(sk_y, LENGTH_GEQ_ONE);
+    d_im.registerLength(sk_y, LENGTH_GEQ_ONE);
     Node sk_z = d_sk_cache.mkSkolem("z_loop");
     // t1 * ... * tn = y * z
     Node conc1 = t_yz.eqNode(utils::mkNConcat(sk_y, sk_z));
@@ -3782,7 +3755,7 @@ void TheoryStrings::processDeq( Node ni, Node nj ) {
                 {
                   Node sk = d_sk_cache.mkSkolemCached(
                       nconst_k, SkolemCache::SK_ID_DC_SPT, "dc_spt");
-                  registerLength(sk, LENGTH_ONE);
+                  d_im.registerLength(sk, LENGTH_ONE);
                   Node skr =
                       d_sk_cache.mkSkolemCached(nconst_k,
                                                 SkolemCache::SK_ID_DC_SPT_REM,
@@ -3831,7 +3804,7 @@ void TheoryStrings::processDeq( Node ni, Node nj ) {
                   i, j, SkolemCache::SK_ID_DEQ_Y, "y_dsplit");
               Node sk3 = d_sk_cache.mkSkolemCached(
                   i, j, SkolemCache::SK_ID_DEQ_Z, "z_dsplit");
-              registerLength(sk3, LENGTH_GEQ_ONE);
+              d_im.registerLength(sk3, LENGTH_GEQ_ONE);
               //Node nemp = sk3.eqNode(d_emptyString).negate();
               //conc.push_back(nemp);
               Node lsk1 = utils::mkNLength(sk1);
@@ -4064,7 +4037,7 @@ void TheoryStrings::registerTerm( Node n, int effort ) {
       // can register length term if it does not rewrite
       if (lsum == lsumb)
       {
-        registerLength(n, LENGTH_SPLIT);
+        d_im.registerLength(n, LENGTH_SPLIT);
         return;
       }
     }
@@ -4080,8 +4053,8 @@ void TheoryStrings::registerTerm( Node n, int effort ) {
     // implied.
     if (n.isConst() || n.getKind() == STRING_CONCAT)
     {
-      // add to length lemma cache, i.e. do not send length lemma for sk.
-      d_length_lemma_terms_cache.insert(sk);
+      // do not send length lemma for sk.
+      d_im.registerLength(sk, LENGTH_IGNORE);
     }
     Trace("strings-assert") << "(assert " << eq << ")" << std::endl;
     d_out->lemma(eq);
@@ -4144,138 +4117,6 @@ void TheoryStrings::registerTerm( Node n, int effort ) {
   }
 }
 
-void TheoryStrings::registerLength(Node n, LengthStatus s)
-{
-  if (d_length_lemma_terms_cache.find(n) != d_length_lemma_terms_cache.end())
-  {
-    return;
-  }
-  d_length_lemma_terms_cache.insert(n);
-
-  NodeManager* nm = NodeManager::currentNM();
-  Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
-
-  if (s == LENGTH_GEQ_ONE)
-  {
-    Node neq_empty = n.eqNode(d_emptyString).negate();
-    Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
-    Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
-    Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
-    d_out->lemma(len_geq_one);
-    return;
-  }
-
-  if (s == LENGTH_ONE)
-  {
-    Node len_one = n_len.eqNode(d_one);
-    Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
-    d_out->lemma(len_one);
-    return;
-  }
-  Assert(s == LENGTH_SPLIT);
-
-  if( options::stringSplitEmp() || !options::stringLenGeqZ() ){
-    Node n_len_eq_z = n_len.eqNode( d_zero );
-    Node n_len_eq_z_2 = n.eqNode( d_emptyString );
-    Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
-    case_empty = Rewriter::rewrite(case_empty);
-    Node case_nempty = nm->mkNode(GT, n_len, d_zero);
-    if (!case_empty.isConst())
-    {
-      Node lem = nm->mkNode(OR, case_empty, case_nempty);
-      d_out->lemma(lem);
-      Trace("strings-lemma") << "Strings::Lemma LENGTH >= 0 : " << lem
-                             << std::endl;
-      // prefer trying the empty case first
-      // notice that requirePhase must only be called on rewritten literals that
-      // occur in the CNF stream.
-      n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
-      Assert(!n_len_eq_z.isConst());
-      d_out->requirePhase(n_len_eq_z, true);
-      n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
-      Assert(!n_len_eq_z_2.isConst());
-      d_out->requirePhase(n_len_eq_z_2, true);
-    }
-    else if (!case_empty.getConst<bool>())
-    {
-      // the rewriter knows that n is non-empty
-      Trace("strings-lemma")
-          << "Strings::Lemma LENGTH > 0 (non-empty): " << case_nempty
-          << std::endl;
-      d_out->lemma(case_nempty);
-    }
-    else
-    {
-      // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
-      // n ---> "". Since this method is only called on non-constants n, it must
-      // be that n = "" ^ len( n ) = 0 does not rewrite to true.
-      Assert(false);
-    }
-  }
-
-  // additionally add len( x ) >= 0 ?
-  if( options::stringLenGeqZ() ){
-    Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero);
-    n_len_geq = Rewriter::rewrite( n_len_geq );
-    d_out->lemma( n_len_geq );
-  }
-}
-
-Node TheoryStrings::mkExplain(const std::vector<Node>& a)
-{
-  std::vector< Node > an;
-  return mkExplain( a, an );
-}
-
-Node TheoryStrings::mkExplain(const std::vector<Node>& a,
-                              const std::vector<Node>& an)
-{
-  std::vector< TNode > antec_exp;
-  // copy to processing vector
-  std::vector<Node> aconj;
-  for (const Node& ac : a)
-  {
-    utils::flattenOp(AND, ac, aconj);
-  }
-  for (const Node& apc : aconj)
-  {
-    Assert(apc.getKind() != AND);
-    Debug("strings-explain") << "Add to explanation " << apc << std::endl;
-    if (apc.getKind() == NOT && apc[0].getKind() == EQUAL)
-    {
-      Assert(d_equalityEngine.hasTerm(apc[0][0]));
-      Assert(d_equalityEngine.hasTerm(apc[0][1]));
-      // ensure that we are ready to explain the disequality
-      AlwaysAssert(d_equalityEngine.areDisequal(apc[0][0], apc[0][1], true));
-    }
-    Assert(apc.getKind() != EQUAL || d_equalityEngine.areEqual(apc[0], apc[1]));
-    // now, explain
-    explain(apc, antec_exp);
-  }
-  for (const Node& anc : an)
-  {
-    if (std::find(antec_exp.begin(), antec_exp.end(), anc) == antec_exp.end())
-    {
-      Debug("strings-explain")
-          << "Add to explanation (new literal) " << anc << std::endl;
-      antec_exp.push_back(anc);
-    }
-  }
-  Node ant;
-  if( antec_exp.empty() ) {
-    ant = d_true;
-  } else if( antec_exp.size()==1 ) {
-    ant = antec_exp[0];
-  } else {
-    ant = NodeManager::currentNM()->mkNode( kind::AND, antec_exp );
-  }
-  return ant;
-}
-
 void TheoryStrings::checkNormalFormsDeq()
 {
   std::vector< std::vector< Node > > cols;
@@ -4309,7 +4150,7 @@ void TheoryStrings::checkNormalFormsDeq()
 
   if (!d_im.hasProcessed())
   {
-    separateByLength( d_strings_eqc, cols, lts );
+    d_state.separateByLength(d_strings_eqc, cols, lts);
     for( unsigned i=0; i<cols.size(); i++ ){
       if (cols[i].size() > 1 && !d_im.hasPendingLemma())
       {
@@ -4334,7 +4175,7 @@ void TheoryStrings::checkNormalFormsDeq()
             {
               if (d_state.areDisequal(cols[i][j], cols[i][k]))
               {
-                Assert(!d_conflict);
+                Assert(!d_state.isInConflict());
                 if (Trace.isOn("strings-solve"))
                 {
                   Trace("strings-solve") << "- Compare " << cols[i][j] << " ";
@@ -4419,7 +4260,7 @@ void TheoryStrings::checkCardinality() {
   //  TODO: revisit this?
   std::vector< std::vector< Node > > cols;
   std::vector< Node > lts;
-  separateByLength( d_strings_eqc, cols, lts );
+  d_state.separateByLength(d_strings_eqc, cols, lts);
 
   Trace("strings-card") << "Check cardinality...." << std::endl;
   for( unsigned i = 0; i<cols.size(); ++i ) {
@@ -4513,39 +4354,6 @@ void TheoryStrings::checkCardinality() {
   Trace("strings-card") << "...end check cardinality" << std::endl;
 }
 
-void TheoryStrings::separateByLength(std::vector< Node >& n,
-  std::vector< std::vector< Node > >& cols,
-  std::vector< Node >& lts ) {
-  unsigned leqc_counter = 0;
-  std::map< Node, unsigned > eqc_to_leqc;
-  std::map< unsigned, Node > leqc_to_eqc;
-  std::map< unsigned, std::vector< Node > > eqc_to_strings;
-  for( unsigned i=0; i<n.size(); i++ ) {
-    Node eqc = n[i];
-    Assert(d_equalityEngine.getRepresentative(eqc) == eqc);
-    EqcInfo* ei = d_state.getOrMakeEqcInfo(eqc, false);
-    Node lt = ei ? ei->d_lengthTerm : Node::null();
-    if( !lt.isNull() ){
-      lt = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, lt );
-      Node r = d_equalityEngine.getRepresentative( lt );
-      if( eqc_to_leqc.find( r )==eqc_to_leqc.end() ){
-        eqc_to_leqc[r] = leqc_counter;
-        leqc_to_eqc[leqc_counter] = r;
-        leqc_counter++;
-      }
-      eqc_to_strings[ eqc_to_leqc[r] ].push_back( eqc );
-    }else{
-      eqc_to_strings[leqc_counter].push_back( eqc );
-      leqc_counter++;
-    }
-  }
-  for( std::map< unsigned, std::vector< Node > >::iterator it = eqc_to_strings.begin(); it != eqc_to_strings.end(); ++it ){
-    cols.push_back( std::vector< Node >() );
-    cols.back().insert( cols.back().end(), it->second.begin(), it->second.end() );
-    lts.push_back( leqc_to_eqc[it->first] );
-  }
-}
-
 void TheoryStrings::printConcat( std::vector< Node >& n, const char * c ) {
   for( unsigned i=0; i<n.size(); i++ ){
     if( i>0 ) Trace(c) << " ++ ";
@@ -4680,7 +4488,8 @@ void TheoryStrings::runInferStep(InferStep s, int effort)
   Trace("strings-process") << "Done " << s
                            << ", addedFact = " << d_im.hasPendingFact()
                            << ", addedLemma = " << d_im.hasPendingLemma()
-                           << ", d_conflict = " << d_conflict << std::endl;
+                           << ", conflict = " << d_state.isInConflict()
+                           << std::endl;
 }
 
 bool TheoryStrings::hasStrategyEffort(Effort e) const
@@ -4789,7 +4598,7 @@ void TheoryStrings::runStrategy(unsigned sbegin, unsigned send)
     else
     {
       runInferStep(curr, d_infer_step_effort[i]);
-      if (d_conflict)
+      if (d_state.isInConflict())
       {
         break;
       }
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h
index 54ea0d158..8e2af8434 100644
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -109,7 +109,6 @@ class TheoryStrings : public Theory {
  public:
   void propagate(Effort e) override;
   bool propagate(TNode literal);
-  void explain( TNode literal, std::vector<TNode>& assumptions );
   Node explain(TNode literal) override;
   eq::EqualityEngine* getEqualityEngine() override { return &d_equalityEngine; }
   bool getCurrentSubstitution(int effort,
@@ -222,8 +221,6 @@ class TheoryStrings : public Theory {
   SolverState d_state;
   /** The (custom) output channel of the theory of strings */
   InferenceManager d_im;
-  /** Are we in conflict */
-  context::CDO<bool> d_conflict;
   /** map from terms to their normal forms */
   std::map<Node, NormalForm> d_normal_form;
   /** get normal form */
@@ -237,7 +234,6 @@ class TheoryStrings : public Theory {
   // preReg cache
   NodeSet d_pregistered_terms_cache;
   NodeSet d_registered_terms_cache;
-  NodeSet d_length_lemma_terms_cache;
   /** preprocessing utility, for performing strings reductions */
   StringsPreprocess d_preproc;
   // extended functions inferences cache
@@ -359,23 +355,6 @@ private:
 
  private:
 
-  /** register length
-   *
-   * This method is called on non-constant string terms n. It sends a lemma
-   * on the output channel that ensures that the length n satisfies its assigned
-   * status (given by argument s).
-   *
-   * If the status is LENGTH_ONE, we send the lemma len( n ) = 1.
-   *
-   * If the status is LENGTH_GEQ, we send a lemma n != "" ^ len( n ) > 0.
-   *
-   * If the status is LENGTH_SPLIT, we send a send a lemma of the form:
-   *   ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0
-   * This method also ensures that, when applicable, the left branch is taken
-   * first via calls to requirePhase.
-   */
-  void registerLength(Node n, LengthStatus s);
-
   /** cache of all skolems */
   SkolemCache d_sk_cache;
 
@@ -636,23 +615,8 @@ private:
    */
   void registerTerm(Node n, int effort);
 
-  /** make explanation
-   *
-   * This returns a node corresponding to the explanation of formulas in a,
-   * interpreted conjunctively. The returned node is a conjunction of literals
-   * that have been asserted to the equality engine.
-   */
-  Node mkExplain(const std::vector<Node>& a);
-  /** Same as above, but the new literals an are append to the result */
-  Node mkExplain(const std::vector<Node>& a, const std::vector<Node>& an);
-
  protected:
 
-
-  // separate into collections with equal length
-  void separateByLength(std::vector<Node>& n,
-                        std::vector<std::vector<Node> >& col,
-                        std::vector<Node>& lts);
   void printConcat(std::vector<Node>& n, const char* c);
 
   // Symbolic Regular Expression