From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Thu, 25 Jul 2019 14:43:19 +0000 (-0500)
Subject: Split infer info data structure in strings (#3107)
X-Git-Tag: cvc5-1.0.0~4076
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=18d42bc7f76b5b144ec498d3b4d3e906224e629f;p=cvc5.git

Split infer info data structure in strings (#3107)
---

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 058e848d6..9da9bd603 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -630,6 +630,8 @@ libcvc4_add_sources(
   theory/shared_terms_database.h
   theory/sort_inference.cpp
   theory/sort_inference.h
+  theory/strings/infer_info.cpp
+  theory/strings/infer_info.h
   theory/strings/inference_manager.cpp
   theory/strings/inference_manager.h
   theory/strings/normal_form.cpp
diff --git a/src/theory/strings/infer_info.cpp b/src/theory/strings/infer_info.cpp
new file mode 100644
index 000000000..a20f608a6
--- /dev/null
+++ b/src/theory/strings/infer_info.cpp
@@ -0,0 +1,44 @@
+/*********************                                                        */
+/*! \file infer_info.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of inference information utility.
+ **/
+
+#include "theory/strings/infer_info.h"
+
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+std::ostream& operator<<(std::ostream& out, Inference i)
+{
+  switch (i)
+  {
+    case INFER_SSPLIT_CST_PROP: out << "S-Split(CST-P)-prop"; break;
+    case INFER_SSPLIT_VAR_PROP: out << "S-Split(VAR)-prop"; break;
+    case INFER_LEN_SPLIT: out << "Len-Split(Len)"; break;
+    case INFER_LEN_SPLIT_EMP: out << "Len-Split(Emp)"; break;
+    case INFER_SSPLIT_CST_BINARY: out << "S-Split(CST-P)-binary"; break;
+    case INFER_SSPLIT_CST: out << "S-Split(CST-P)"; break;
+    case INFER_SSPLIT_VAR: out << "S-Split(VAR)"; break;
+    case INFER_FLOOP: out << "F-Loop"; break;
+    default: out << "?"; break;
+  }
+  return out;
+}
+
+InferInfo::InferInfo() : d_id(INFER_NONE), d_index(0), d_rev(false) {}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h
new file mode 100644
index 000000000..9b19eba4b
--- /dev/null
+++ b/src/theory/strings/infer_info.h
@@ -0,0 +1,153 @@
+/*********************                                                        */
+/*! \file infer_info.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Inference information utility
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__INFER_INFO_H
+#define CVC4__THEORY__STRINGS__INFER_INFO_H
+
+#include <map>
+#include <vector>
+#include "expr/node.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+/** Types of inferences used in the procedure
+ *
+ * These are variants of the inference rules in Figures 3-5 of Liang et al.
+ * "A DPLL(T) Solver for a Theory of Strings and Regular Expressions", CAV 2014.
+ */
+enum Inference
+{
+  INFER_NONE = 0,
+  // string split constant propagation, for example:
+  //     x = y, x = "abc", y = y1 ++ "b" ++ y2
+  //       implies y1 = "a" ++ y1'
+  INFER_SSPLIT_CST_PROP = 1,
+  // string split variable propagation, for example:
+  //     x = y, x = x1 ++ x2, y = y1 ++ y2, len( x1 ) >= len( y1 )
+  //       implies x1 = y1 ++ x1'
+  // This is inspired by Zheng et al CAV 2015.
+  INFER_SSPLIT_VAR_PROP,
+  // length split, for example:
+  //     len( x1 ) = len( y1 ) V len( x1 ) != len( y1 )
+  // This is inferred when e.g. x = y, x = x1 ++ x2, y = y1 ++ y2.
+  INFER_LEN_SPLIT,
+  // length split empty, for example:
+  //     z = "" V z != ""
+  // This is inferred when, e.g. x = y, x = z ++ x1, y = y1 ++ z
+  INFER_LEN_SPLIT_EMP,
+  // string split constant binary, for example:
+  //     x1 = "aaaa" ++ x1' V "aaaa" = x1 ++ x1'
+  // This is inferred when, e.g. x = y, x = x1 ++ x2, y = "aaaaaaaa" ++ y2.
+  // This inference is disabled by default and is enabled by stringBinaryCsp().
+  INFER_SSPLIT_CST_BINARY,
+  // string split constant
+  //    x = y, x = "c" ++ x2, y = y1 ++ y2, y1 != ""
+  //      implies y1 = "c" ++ y1'
+  // This is a special case of F-Split in Figure 5 of Liang et al CAV 2014.
+  INFER_SSPLIT_CST,
+  // string split variable, for example:
+  //    x = y, x = x1 ++ x2, y = y1 ++ y2
+  //      implies x1 = y1 ++ x1' V y1 = x1 ++ y1'
+  // This is rule F-Split in Figure 5 of Liang et al CAV 2014.
+  INFER_SSPLIT_VAR,
+  // flat form loop, for example:
+  //    x = y, x = x1 ++ z, y = z ++ y2
+  //      implies z = u2 ++ u1, u in ( u1 ++ u2 )*, x1 = u2 ++ u, y2 = u ++ u1
+  //        for fresh u, u1, u2.
+  // This is the rule F-Loop from Figure 5 of Liang et al CAV 2014.
+  INFER_FLOOP,
+};
+std::ostream& operator<<(std::ostream& out, Inference i);
+
+/**
+ * Length status, used for indicating the length constraints for Skolems
+ * introduced by the theory of strings.
+ */
+enum LengthStatus
+{
+  // The length of the Skolem is not specified, and should be split on.
+  LENGTH_SPLIT,
+  // The length of the Skolem is exactly one.
+  LENGTH_ONE,
+  // The length of the Skolem is greater than or equal to one.
+  LENGTH_GEQ_ONE
+};
+
+/**
+ * This data structure encapsulates an inference for strings. This includes
+ * the form of the inference, as well as the side effects it generates.
+ */
+class InferInfo
+{
+ public:
+  InferInfo();
+  /**
+   * The identifier for the inference, indicating the kind of reasoning used
+   * for this conclusion.
+   */
+  Inference d_id;
+  /** The conclusion of the inference */
+  Node d_conc;
+  /**
+   * The antecedant(s) of the inference, interpreted conjunctively. These are
+   * literals that currently hold in the equality engine.
+   */
+  std::vector<Node> d_ant;
+  /**
+   * The "new literal" antecedant(s) of the inference, interpreted
+   * conjunctively. These are literals that were needed to show the conclusion
+   * but do not currently hold in the equality engine.
+   */
+  std::vector<Node> d_antn;
+  /**
+   * A list of new skolems introduced as a result of this inference. They
+   * are mapped to by a length status, indicating the length constraint that
+   * can be assumed for them.
+   */
+  std::map<LengthStatus, std::vector<Node> > d_new_skolem;
+  /**
+   * The pending phase requirements, see InferenceManager::sendPhaseRequirement.
+   */
+  std::map<Node, bool> d_pending_phase;
+  /**
+   * The index in the normal forms under which this inference is addressing.
+   * For example, if the inference is inferring x = y from |x|=|y| and
+   *   w ++ x ++ ... = w ++ y ++ ...
+   * then d_index is 1, since x and y are at index 1 in these concat terms.
+   */
+  unsigned d_index;
+  /**
+   * The normal form pair that is cached as a result of this inference.
+   */
+  Node d_nf_pair[2];
+  /** for debugging
+   *
+   * The base pair of strings d_i/d_j that led to the inference, and whether
+   * (d_rev) we were processing the normal forms of these strings in reverse
+   * direction.
+   */
+  Node d_i;
+  Node d_j;
+  bool d_rev;
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
+
+#endif /* CVC4__THEORY__STRINGS__THEORY_STRINGS_H */
diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp
index ec144aa6d..f064a26e2 100644
--- a/src/theory/strings/inference_manager.cpp
+++ b/src/theory/strings/inference_manager.cpp
@@ -99,8 +99,8 @@ bool InferenceManager::sendInternalInference(std::vector<Node>& exp,
   return true;
 }
 
-void InferenceManager::sendInference(std::vector<Node>& exp,
-                                     std::vector<Node>& exp_n,
+void InferenceManager::sendInference(const std::vector<Node>& exp,
+                                     const std::vector<Node>& exp_n,
                                      Node eq,
                                      const char* c,
                                      bool asLemma)
@@ -164,7 +164,7 @@ void InferenceManager::sendInference(std::vector<Node>& exp,
   }
 }
 
-void InferenceManager::sendInference(std::vector<Node>& exp,
+void InferenceManager::sendInference(const std::vector<Node>& exp,
                                      Node eq,
                                      const char* c,
                                      bool asLemma)
@@ -173,6 +173,13 @@ void InferenceManager::sendInference(std::vector<Node>& exp,
   sendInference(exp, exp_n, eq, c, asLemma);
 }
 
+void InferenceManager::sendInference(const InferInfo& i)
+{
+  std::stringstream ssi;
+  ssi << i.d_id;
+  sendInference(i.d_ant, i.d_antn, i.d_conc, ssi.str().c_str(), true);
+}
+
 void InferenceManager::sendLemma(Node ant, Node conc, const char* c)
 {
   if (conc.isNull() || conc == d_false)
diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h
index 71651f7df..bb78b4a1a 100644
--- a/src/theory/strings/inference_manager.h
+++ b/src/theory/strings/inference_manager.h
@@ -24,6 +24,7 @@
 #include "context/context.h"
 #include "expr/node.h"
 #include "theory/output_channel.h"
+#include "theory/strings/infer_info.h"
 #include "theory/uf/equality_engine.h"
 
 namespace CVC4 {
@@ -125,16 +126,22 @@ class InferenceManager
    * If the flag asLemma is true, then this method will send a lemma instead
    * of an inference whenever applicable.
    */
-  void sendInference(std::vector<Node>& exp,
-                     std::vector<Node>& exp_n,
+  void sendInference(const std::vector<Node>& exp,
+                     const std::vector<Node>& exp_n,
                      Node eq,
                      const char* c,
                      bool asLemma = false);
   /** same as above, but where exp_n is empty */
-  void sendInference(std::vector<Node>& exp,
+  void sendInference(const std::vector<Node>& exp,
                      Node eq,
                      const char* c,
                      bool asLemma = false);
+  /** Send inference
+   *
+   * Makes the appropriate call to send inference based on the infer info
+   * data structure (see sendInference documentation above).
+   */
+  void sendInference(const InferInfo& i);
   /** Send split
    *
    * This requests that ( a = b V a != b ) is sent on the output channel as a
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index 5e7061d22..952d82c21 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -39,23 +39,6 @@ namespace CVC4 {
 namespace theory {
 namespace strings {
 
-std::ostream& operator<<(std::ostream& out, Inference i)
-{
-  switch (i)
-  {
-    case INFER_SSPLIT_CST_PROP: out << "S-Split(CST-P)-prop"; break;
-    case INFER_SSPLIT_VAR_PROP: out << "S-Split(VAR)-prop"; break;
-    case INFER_LEN_SPLIT: out << "Len-Split(Len)"; break;
-    case INFER_LEN_SPLIT_EMP: out << "Len-Split(Emp)"; break;
-    case INFER_SSPLIT_CST_BINARY: out << "S-Split(CST-P)-binary"; break;
-    case INFER_SSPLIT_CST: out << "S-Split(CST-P)"; break;
-    case INFER_SSPLIT_VAR: out << "S-Split(VAR)"; break;
-    case INFER_FLOOP: out << "F-Loop"; break;
-    default: out << "?"; break;
-  }
-  return out;
-}
-
 std::ostream& operator<<(std::ostream& out, InferStep s)
 {
   switch (s)
@@ -2947,25 +2930,24 @@ void TheoryStrings::processNEqc(std::vector<NormalForm>& normal_forms)
       set_use_index = true;
     }
   }
+  doInferInfo(pinfer[use_index]);
+}
+
+void TheoryStrings::doInferInfo(const InferInfo& ii)
+{
   // send the inference
-  if (!pinfer[use_index].d_nf_pair[0].isNull())
+  if (!ii.d_nf_pair[0].isNull())
   {
-    Assert(!pinfer[use_index].d_nf_pair[1].isNull());
-    addNormalFormPair(pinfer[use_index].d_nf_pair[0],
-                      pinfer[use_index].d_nf_pair[1]);
-  }
-  std::stringstream ssi;
-  ssi << pinfer[use_index].d_id;
-  d_im.sendInference(pinfer[use_index].d_ant,
-                     pinfer[use_index].d_antn,
-                     pinfer[use_index].d_conc,
-                     ssi.str().c_str(),
-                     pinfer[use_index].sendAsLemma());
+    Assert(!ii.d_nf_pair[1].isNull());
+    addNormalFormPair(ii.d_nf_pair[0], ii.d_nf_pair[1]);
+  }
+  // send the inference
+  d_im.sendInference(ii);
   // Register the new skolems from this inference. We register them here
   // (lazily), since the code above has now decided to use the inference
   // at use_index that involves them.
   for (const std::pair<const LengthStatus, std::vector<Node> >& sks :
-       pinfer[use_index].d_new_skolem)
+       ii.d_new_skolem)
   {
     for (const Node& n : sks.second)
     {
@@ -2974,10 +2956,6 @@ void TheoryStrings::processNEqc(std::vector<NormalForm>& normal_forms)
   }
 }
 
-bool TheoryStrings::InferInfo::sendAsLemma() {
-  return true;
-}
-
 void TheoryStrings::processSimpleNEq(NormalForm& nfi,
                                      NormalForm& nfj,
                                      unsigned& index,
@@ -4110,50 +4088,44 @@ Node TheoryStrings::mkLength( Node t ) {
   return Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t ) );
 }
 
-Node TheoryStrings::mkExplain( std::vector< Node >& a ) {
+Node TheoryStrings::mkExplain(const std::vector<Node>& a)
+{
   std::vector< Node > an;
   return mkExplain( a, an );
 }
 
-Node TheoryStrings::mkExplain( std::vector< Node >& a, std::vector< Node >& an ) {
+Node TheoryStrings::mkExplain(const std::vector<Node>& a,
+                              const std::vector<Node>& an)
+{
   std::vector< TNode > antec_exp;
-  for( unsigned i=0; i<a.size(); i++ ) {
-    if( std::find( a.begin(), a.begin() + i, a[i] )==a.begin() + i ) {
-      bool exp = true;
-      Debug("strings-explain") << "Ask for explanation of " << a[i] << std::endl;
-      //assert
-      if(a[i].getKind() == kind::EQUAL) {
-        //Assert( hasTerm(a[i][0]) );
-        //Assert( hasTerm(a[i][1]) );
-        Assert( areEqual(a[i][0], a[i][1]) );
-        if( a[i][0]==a[i][1] ){
-          exp = false;
-        }
-      } else if( a[i].getKind()==kind::NOT && a[i][0].getKind()==kind::EQUAL ) {
-        Assert( hasTerm(a[i][0][0]) );
-        Assert( hasTerm(a[i][0][1]) );
-        AlwaysAssert( d_equalityEngine.areDisequal(a[i][0][0], a[i][0][1], true) );
-      }else if( a[i].getKind() == kind::AND ){
-        for( unsigned j=0; j<a[i].getNumChildren(); j++ ){
-          a.push_back( a[i][j] );
-        }
-        exp = false;
-      }
-      if( exp ) {
-        unsigned ps = antec_exp.size();
-        explain(a[i], antec_exp);
-        Debug("strings-explain") << "Done, explanation was : " << std::endl;
-        for( unsigned j=ps; j<antec_exp.size(); j++ ) {
-          Debug("strings-explain") << "  " << antec_exp[j] << std::endl;
-        }
-        Debug("strings-explain") << std::endl;
-      }
+  // copy to processing vector
+  std::vector<Node> aconj;
+  for (const Node& ac : a)
+  {
+    utils::flattenOp(AND, ac, aconj);
+  }
+  for (const Node& apc : aconj)
+  {
+    Assert(apc.getKind() != AND);
+    Debug("strings-explain") << "Add to explanation " << apc << std::endl;
+    if (apc.getKind() == NOT && apc[0].getKind() == EQUAL)
+    {
+      Assert(hasTerm(apc[0][0]));
+      Assert(hasTerm(apc[0][1]));
+      // ensure that we are ready to explain the disequality
+      AlwaysAssert(d_equalityEngine.areDisequal(apc[0][0], apc[0][1], true));
     }
+    Assert(apc.getKind() != EQUAL || d_equalityEngine.areEqual(apc[0], apc[1]));
+    // now, explain
+    explain(apc, antec_exp);
   }
-  for( unsigned i=0; i<an.size(); i++ ) {
-    if( std::find( an.begin(), an.begin() + i, an[i] )==an.begin() + i ){
-      Debug("strings-explain") << "Add to explanation (new literal) " << an[i] << std::endl;
-      antec_exp.push_back(an[i]);
+  for (const Node& anc : an)
+  {
+    if (std::find(antec_exp.begin(), antec_exp.end(), anc) == antec_exp.end())
+    {
+      Debug("strings-explain")
+          << "Add to explanation (new literal) " << anc << std::endl;
+      antec_exp.push_back(anc);
     }
   }
   Node ant;
@@ -4164,7 +4136,6 @@ Node TheoryStrings::mkExplain( std::vector< Node >& a, std::vector< Node >& an )
   } else {
     ant = NodeManager::currentNM()->mkNode( kind::AND, antec_exp );
   }
-  //ant = Rewriter::rewrite( ant );
   return ant;
 }
 
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h
index a83a6ad12..e3bb2e719 100644
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -24,6 +24,7 @@
 #include "expr/attribute.h"
 #include "expr/node_trie.h"
 #include "theory/decision_manager.h"
+#include "theory/strings/infer_info.h"
 #include "theory/strings/inference_manager.h"
 #include "theory/strings/normal_form.h"
 #include "theory/strings/regexp_elim.h"
@@ -46,55 +47,6 @@ namespace strings {
  *
  */
 
-/** Types of inferences used in the procedure
- *
- * These are variants of the inference rules in Figures 3-5 of Liang et al.
- * "A DPLL(T) Solver for a Theory of Strings and Regular Expressions", CAV 2014.
- */
-enum Inference
-{
-  INFER_NONE,
-  // string split constant propagation, for example:
-  //     x = y, x = "abc", y = y1 ++ "b" ++ y2
-  //       implies y1 = "a" ++ y1'
-  INFER_SSPLIT_CST_PROP,
-  // string split variable propagation, for example:
-  //     x = y, x = x1 ++ x2, y = y1 ++ y2, len( x1 ) >= len( y1 )
-  //       implies x1 = y1 ++ x1'
-  // This is inspired by Zheng et al CAV 2015.
-  INFER_SSPLIT_VAR_PROP,
-  // length split, for example:
-  //     len( x1 ) = len( y1 ) V len( x1 ) != len( y1 )
-  // This is inferred when e.g. x = y, x = x1 ++ x2, y = y1 ++ y2.
-  INFER_LEN_SPLIT,
-  // length split empty, for example:
-  //     z = "" V z != ""
-  // This is inferred when, e.g. x = y, x = z ++ x1, y = y1 ++ z
-  INFER_LEN_SPLIT_EMP,
-  // string split constant binary, for example:
-  //     x1 = "aaaa" ++ x1' V "aaaa" = x1 ++ x1'
-  // This is inferred when, e.g. x = y, x = x1 ++ x2, y = "aaaaaaaa" ++ y2.
-  // This inference is disabled by default and is enabled by stringBinaryCsp().
-  INFER_SSPLIT_CST_BINARY,
-  // string split constant
-  //    x = y, x = "c" ++ x2, y = y1 ++ y2, y1 != ""
-  //      implies y1 = "c" ++ y1'
-  // This is a special case of F-Split in Figure 5 of Liang et al CAV 2014.
-  INFER_SSPLIT_CST,
-  // string split variable, for example:
-  //    x = y, x = x1 ++ x2, y = y1 ++ y2
-  //      implies x1 = y1 ++ x1' V y1 = x1 ++ y1'
-  // This is rule F-Split in Figure 5 of Liang et al CAV 2014.
-  INFER_SSPLIT_VAR,
-  // flat form loop, for example:
-  //    x = y, x = x1 ++ z, y = z ++ y2
-  //      implies z = u2 ++ u1, u in ( u1 ++ u2 )*, x1 = u2 ++ u, y2 = u ++ u1
-  //        for fresh u, u1, u2.
-  // This is the rule F-Loop from Figure 5 of Liang et al CAV 2014.
-  INFER_FLOOP,
-};
-std::ostream& operator<<(std::ostream& out, Inference i);
-
 /** inference steps
  *
  * Corresponds to a step in the overall strategy of the strings solver. For
@@ -448,13 +400,6 @@ private:
   };
 
  private:
-  /** Length status, used for the registerLength function below */
-  enum LengthStatus
-  {
-    LENGTH_SPLIT,
-    LENGTH_ONE,
-    LENGTH_GEQ_ONE
-  };
 
   /** register length
    *
@@ -473,30 +418,6 @@ private:
    */
   void registerLength(Node n, LengthStatus s);
 
-  //------------------------- candidate inferences
-  class InferInfo
-  {
-   public:
-    /** for debugging
-     *
-     * The base pair of strings d_i/d_j that led to the inference, and whether
-     * (d_rev) we were processing the normal forms of these strings in reverse
-     * direction.
-     */
-    Node d_i;
-    Node d_j;
-    bool d_rev;
-    std::vector<Node> d_ant;
-    std::vector<Node> d_antn;
-    std::map<LengthStatus, std::vector<Node> > d_new_skolem;
-    Node d_conc;
-    Inference d_id;
-    std::map<Node, bool> d_pending_phase;
-    unsigned d_index;
-    Node d_nf_pair[2];
-    bool sendAsLemma();
-  };
-  //------------------------- end candidate inferences
   /** cache of all skolems */
   SkolemCache d_sk_cache;
 
@@ -738,6 +659,12 @@ private:
    * of atom, including calls to registerTerm.
    */
   void assertPendingFact(Node atom, bool polarity, Node exp);
+  /**
+   * This processes the infer info ii as an inference. In more detail, it calls
+   * the inference manager to process the inference, it introduces Skolems, and
+   * updates the set of normal form pairs.
+   */
+  void doInferInfo(const InferInfo& ii);
   /**
    * Adds equality a = b to the vector exp if a and b are distinct terms. It
    * must be the case that areEqual( a, b ) holds in this context.
@@ -778,9 +705,15 @@ private:
   inline Node mkConcat(const std::vector<Node>& c);
   inline Node mkLength(Node n);
 
-  /** mkExplain **/
-  Node mkExplain(std::vector<Node>& a);
-  Node mkExplain(std::vector<Node>& a, std::vector<Node>& an);
+  /** make explanation
+   *
+   * This returns a node corresponding to the explanation of formulas in a,
+   * interpreted conjunctively. The returned node is a conjunction of literals
+   * that have been asserted to the equality engine.
+   */
+  Node mkExplain(const std::vector<Node>& a);
+  /** Same as above, but the new literals an are append to the result */
+  Node mkExplain(const std::vector<Node>& a, const std::vector<Node>& an);
 
  protected:
 
diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp
index bb2054b0e..c8951e10a 100644
--- a/src/theory/strings/theory_strings_utils.cpp
+++ b/src/theory/strings/theory_strings_utils.cpp
@@ -23,7 +23,7 @@ namespace theory {
 namespace strings {
 namespace utils {
 
-Node mkAnd(std::vector<Node>& a)
+Node mkAnd(const std::vector<Node>& a)
 {
   std::vector<Node> au;
   for (const Node& ai : a)
@@ -44,6 +44,47 @@ Node mkAnd(std::vector<Node>& a)
   return NodeManager::currentNM()->mkNode(AND, au);
 }
 
+void flattenOp(Kind k, Node n, std::vector<Node>& conj)
+{
+  if (n.getKind() != k)
+  {
+    // easy case, just add to conj if non-duplicate
+    if (std::find(conj.begin(), conj.end(), n) == conj.end())
+    {
+      conj.push_back(n);
+    }
+    return;
+  }
+  // otherwise, traverse
+  std::unordered_set<TNode, TNodeHashFunction> visited;
+  std::unordered_set<TNode, TNodeHashFunction>::iterator it;
+  std::vector<TNode> visit;
+  TNode cur;
+  visit.push_back(n);
+  do
+  {
+    cur = visit.back();
+    visit.pop_back();
+    it = visited.find(cur);
+
+    if (it == visited.end())
+    {
+      visited.insert(cur);
+      if (cur.getKind() == k)
+      {
+        for (const Node& cn : cur)
+        {
+          visit.push_back(cn);
+        }
+      }
+      else if (std::find(conj.begin(), conj.end(), cur) == conj.end())
+      {
+        conj.push_back(cur);
+      }
+    }
+  } while (!visit.empty());
+}
+
 void getConcat(Node n, std::vector<Node>& c)
 {
   Kind k = n.getKind();
diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h
index 296f4e46e..69400d005 100644
--- a/src/theory/strings/theory_strings_utils.h
+++ b/src/theory/strings/theory_strings_utils.h
@@ -32,7 +32,13 @@ namespace utils {
  * Make the conjunction of nodes in a. Removes duplicate conjuncts, returns
  * true if a is empty, and a single literal if a has size 1.
  */
-Node mkAnd(std::vector<Node>& a);
+Node mkAnd(const std::vector<Node>& a);
+
+/**
+ * Adds all (non-duplicate) children of <k> applications from n to conj. For
+ * example, given (<k> (<k> A B) C A), we add { A, B, C } to conj.
+ */
+void flattenOp(Kind k, Node n, std::vector<Node>& conj);
 
 /**
  * Gets the "vector form" of term n, adds it to c.