Refactor normal forms in strings (#2897)
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>
Tue, 23 Apr 2019 18:40:27 +0000 (13:40 -0500)
committerGitHub <noreply@github.com>
Tue, 23 Apr 2019 18:40:27 +0000 (13:40 -0500)
src/CMakeLists.txt
src/theory/strings/normal_form.cpp [new file with mode: 0644]
src/theory/strings/normal_form.h [new file with mode: 0644]
src/theory/strings/theory_strings.cpp
src/theory/strings/theory_strings.h

index 372888d3665c006e4354329688ae8e77b145eb9d..cff31dbcdefda37c06d6240c46aa44d8a73a6652 100644 (file)
@@ -625,6 +625,8 @@ libcvc4_add_sources(
   theory/shared_terms_database.h
   theory/sort_inference.cpp
   theory/sort_inference.h
+  theory/strings/normal_form.cpp
+  theory/strings/normal_form.h
   theory/strings/regexp_elim.cpp
   theory/strings/regexp_elim.h
   theory/strings/regexp_operation.cpp
diff --git a/src/theory/strings/normal_form.cpp b/src/theory/strings/normal_form.cpp
new file mode 100644 (file)
index 0000000..c70845d
--- /dev/null
@@ -0,0 +1,163 @@
+/*********************                                                        */
+/*! \file normal_form.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of normal form data structure for the theory of
+ **  strings.
+ **/
+
+#include "theory/strings/normal_form.h"
+
+#include "options/strings_options.h"
+#include "theory/rewriter.h"
+
+using namespace std;
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+void NormalForm::init(Node base)
+{
+  Assert(base.getType().isString());
+  Assert(base.getKind() != STRING_CONCAT);
+  d_base = base;
+  d_nf.clear();
+  d_isRev = false;
+  d_exp.clear();
+  d_expDep.clear();
+
+  // add to normal form
+  if (!base.isConst() || !base.getConst<String>().isEmptyString())
+  {
+    d_nf.push_back(base);
+  }
+}
+
+void NormalForm::reverse()
+{
+  std::reverse(d_nf.begin(), d_nf.end());
+  d_isRev = !d_isRev;
+}
+
+void NormalForm::splitConstant(unsigned index, Node c1, Node c2)
+{
+  Assert(Rewriter::rewrite(NodeManager::currentNM()->mkNode(
+             STRING_CONCAT, d_isRev ? c2 : c1, d_isRev ? c1 : c2))
+         == d_nf[index]);
+  d_nf.insert(d_nf.begin() + index + 1, c2);
+  d_nf[index] = c1;
+  // update the dependency indices
+  // notice this is not critical for soundness: not doing the below incrementing
+  // will only lead to overapproximating when antecedants are required in
+  // explanations
+  for (const std::pair<Node, std::map<bool, unsigned> >& pe : d_expDep)
+  {
+    for (const std::pair<bool, unsigned>& pep : pe.second)
+    {
+      // See if this can be incremented: it can if this literal is not relevant
+      // to the current index, and hence it is not relevant for both c1 and c2.
+      Assert(pep.second >= 0 && pep.second <= d_nf.size());
+      bool increment = (pep.first == d_isRev)
+                           ? pep.second > index
+                           : (d_nf.size() - 1 - pep.second) < index;
+      if (increment)
+      {
+        d_expDep[pe.first][pep.first] = pep.second + 1;
+      }
+    }
+  }
+}
+
+void NormalForm::addToExplanation(Node exp,
+                                  unsigned new_val,
+                                  unsigned new_rev_val)
+{
+  if (std::find(d_exp.begin(), d_exp.end(), exp) == d_exp.end())
+  {
+    d_exp.push_back(exp);
+  }
+  for (unsigned k = 0; k < 2; k++)
+  {
+    unsigned val = k == 0 ? new_val : new_rev_val;
+    std::map<bool, unsigned>::iterator itned = d_expDep[exp].find(k == 1);
+    if (itned == d_expDep[exp].end())
+    {
+      Trace("strings-process-debug")
+          << "Deps : set dependency on " << exp << " to " << val
+          << " isRev=" << (k == 0) << std::endl;
+      d_expDep[exp][k == 1] = val;
+    }
+    else
+    {
+      Trace("strings-process-debug")
+          << "Deps : Multiple dependencies on " << exp << " : " << itned->second
+          << " " << val << " isRev=" << (k == 0) << std::endl;
+      // if we already have a dependency (in the case of non-linear string
+      // equalities), it is min/max
+      bool cmp = val > itned->second;
+      if (cmp == (k == 1))
+      {
+        d_expDep[exp][k == 1] = val;
+      }
+    }
+  }
+}
+
+void NormalForm::getExplanation(int index, std::vector<Node>& curr_exp)
+{
+  if (index == -1 || !options::stringMinPrefixExplain())
+  {
+    curr_exp.insert(curr_exp.end(), d_exp.begin(), d_exp.end());
+    return;
+  }
+  for (const Node& exp : d_exp)
+  {
+    int dep = static_cast<int>(d_expDep[exp][d_isRev]);
+    if (dep <= index)
+    {
+      curr_exp.push_back(exp);
+      Trace("strings-explain-prefix-debug") << "  include : ";
+    }
+    else
+    {
+      Trace("strings-explain-prefix-debug") << "  exclude : ";
+    }
+    Trace("strings-explain-prefix-debug") << exp << std::endl;
+  }
+}
+
+void NormalForm::getExplanationForPrefixEq(NormalForm& nfi,
+                                           NormalForm& nfj,
+                                           int index_i,
+                                           int index_j,
+                                           std::vector<Node>& curr_exp)
+{
+  Assert(nfi.d_isRev == nfj.d_isRev);
+  Trace("strings-explain-prefix")
+      << "Get explanation for prefix " << index_i << ", " << index_j
+      << ", reverse = " << nfi.d_isRev << std::endl;
+  // get explanations
+  nfi.getExplanation(index_i, curr_exp);
+  nfj.getExplanation(index_j, curr_exp);
+  Trace("strings-explain-prefix")
+      << "Included " << curr_exp.size() << " / "
+      << (nfi.d_exp.size() + nfj.d_exp.size()) << std::endl;
+  if (nfi.d_base != nfj.d_base)
+  {
+    Node eq = nfi.d_base.eqNode(nfj.d_base);
+    curr_exp.push_back(eq);
+  }
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/strings/normal_form.h b/src/theory/strings/normal_form.h
new file mode 100644 (file)
index 0000000..73036e6
--- /dev/null
@@ -0,0 +1,159 @@
+/*********************                                                        */
+/*! \file normal_form.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Normal form datastructure for the theory of strings.
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef __CVC4__THEORY__STRINGS__NORMAL_FORM_H
+#define __CVC4__THEORY__STRINGS__NORMAL_FORM_H
+
+#include <map>
+#include <vector>
+#include "expr/node.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+/** normal forms
+ *
+ * Stores information regarding the "normal form" of terms t in the current
+ * context. Normal forms can be associated with terms, or with string
+ * equivalence classes. For the latter, the normal form of an equivalence class
+ * exists if exactly one unique normal form is associated to a subset of its
+ * terms.
+ *
+ * In the following we use example where assertions are:
+ *   { x = y, y = z, y = u ++ v, u = u1 ++ u2 }
+ * and equivalence class [x] = { x, y, z, u ++ v }, whose normal form is
+ *   (u1, u2, v)
+ */
+class NormalForm
+{
+ public:
+  NormalForm() : d_isRev(false) {}
+  /**
+   * The "base" of the normal form. This is some term in the equivalence
+   * class of t that the normal form is based on. This is an arbitrary term
+   * which is used as the reference point for explanations. In the above
+   * running example, let us assume the base of [x] is y.
+   */
+  Node d_base;
+  /** the normal form, (u1, u2, v), in the above example */
+  std::vector<Node> d_nf;
+  /** is the normal form d_nf stored in reverse order? */
+  bool d_isRev;
+  /**
+   * The explanation for the normal form, this is a set of literals such that
+   *   d_exp => d_base = d_nf
+   * In the above example, this is the set of equalities
+   *   { y = u ++ v, u = u1 ++ u2 }
+   * If u ++ v was chosen as the base, then the first literal could be omitted.
+   */
+  std::vector<Node> d_exp;
+  /**
+   * Map from literals in the vector d_exp to integers indicating indices in
+   * d_nf for which that literal L is relevant for explaining d_base = d_nf.
+   *
+   * In particular:
+   * - false maps to an (ideally maximal) index relative to the start of d_nf
+   * such that L is required for explaining why d_base has a prefix that
+   * includes the term at that index,
+   * - true maps to an (ideally maximal) index relative to the end of d_nf
+   * such that L is required for explaining why d_base has a suffix that
+   * includes the term at that index.
+   * We call these the forward and backwards dependency indices.
+   *
+   * In the above example:
+   *   y = u ++ v   : false -> 0, true -> 0
+   *   u = u1 ++ u2 : false -> 0, true -> 1
+   * When explaining y = u1 ++ u2 ++ v, the equality y = u ++ v is required
+   * for explaining any prefix/suffix of y and its normal form. More
+   * interestingly, the equality u = u1 ++ u2 is not required for explaining
+   * that v is a suffix of y, since its reverse index in this map is 1,
+   * indicating that "u2" is the first position in u1 ++ u2 ++ v that it is
+   * required for explaining.
+   *
+   * This information is used to minimize explanations when conflicts arise,
+   * thereby strengthening conflict clauses and lemmas.
+   *
+   * For example, say u ++ v = y = x = u ++ w and w and v are distinct
+   * constants, using this dependency information, we could construct a
+   * conflict:
+   *   x = y ^ y = u ++ v ^ x = u ++ w
+   * that does not include u = u1 ++ u2, because the conflict only pertains
+   * to the last position in the normal form of y.
+   */
+  std::map<Node, std::map<bool, unsigned> > d_expDep;
+  /** initialize
+   *
+   * Initialize the normal form with base node base. If base is not the empty
+   * string, then d_nf is set to the singleton list containing base, otherwise
+   * d_nf is empty.
+   */
+  void init(Node base);
+  /** reverse the content of normal form d_nf
+   *
+   * This operation is done in contexts where the normal form is being scanned
+   * in reverse order.
+   */
+  void reverse();
+  /** split constant
+   *
+   * Splits the constant in d_nf at index to constants c1 and c2.
+   *
+   * Notice this function depends on whether the normal form has been reversed
+   * d_isRev, as this impacts how the dependency indices are updated.
+   */
+  void splitConstant(unsigned index, Node c1, Node c2);
+  /** add to explanation
+   *
+   * This adds exp to the explanation vector d_exp with new forward and
+   * backwards dependency indices new_val and new_rev_val.
+   *
+   * If exp already has dependencies, we update the forward dependency
+   * index to the minimum of the previous value and the new value, and
+   * similarly update the backwards dependency index to the maximum.
+   */
+  void addToExplanation(Node exp, unsigned new_val, unsigned new_rev_val);
+  /** get explanation
+   *
+   * This gets the explanation for the prefix (resp. suffix) of the normal
+   * form up to index when d_isRev is false (resp. true). In particular;
+   *
+   * If index is -1, then this method adds all literals in d_exp to curr_exp.
+   *
+   * If index>=0, this method adds all literals in d_exp to curr_exp whose
+   * forward (resp. backwards) dependency index is less than index
+   * when isRev is false (resp. true).
+   */
+  void getExplanation(int index, std::vector<Node>& curr_exp);
+  /** get explanation for prefix equality
+   *
+   * This adds to curr_exp the reason why the prefix of nfi up to index index_i
+   * is equivalent to the prefix of nfj up to index_j. The values of
+   * nfi.d_isRev and nfj.d_isRev affect how dependency indices are updated
+   * during this call.
+   */
+  static void getExplanationForPrefixEq(NormalForm& nfi,
+                                        NormalForm& nfj,
+                                        int index_i,
+                                        int index_j,
+                                        std::vector<Node>& curr_exp);
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
+
+#endif /* __CVC4__THEORY__STRINGS__NORMAL_FORM_H */
index 8731bd1a5cc05d13b8574946fc952216de2b105c..c2b87fbef231af32fd07d10ba618ca94c7ef655c 100644 (file)
@@ -253,13 +253,13 @@ Node TheoryStrings::getNormalString(Node x, std::vector<Node>& nf_exp)
   if (!x.isConst())
   {
     Node xr = getRepresentative(x);
-    if (d_normal_forms.find(xr) != d_normal_forms.end())
+    std::map<Node, NormalForm>::iterator it = d_normal_form.find(xr);
+    if (it != d_normal_form.end())
     {
-      Node ret = mkConcat(d_normal_forms[xr]);
-      nf_exp.insert(nf_exp.end(),
-                    d_normal_forms_exp[xr].begin(),
-                    d_normal_forms_exp[xr].end());
-      addToExplanation(x, d_normal_forms_base[xr], nf_exp);
+      NormalForm& nf = it->second;
+      Node ret = mkConcat(nf.d_nf);
+      nf_exp.insert(nf_exp.end(), nf.d_exp.begin(), nf.d_exp.end());
+      addToExplanation(x, nf.d_base, nf_exp);
       Trace("strings-debug")
           << "Term: " << x << " has a normal form " << ret << std::endl;
       return ret;
@@ -400,11 +400,14 @@ bool TheoryStrings::getCurrentSubstitution( int effort, std::vector< Node >& var
         }
       }else if( effort>=1 && effort<3 && n.getType().isString() ){
         //normal forms
-        Node ns = getNormalString( d_normal_forms_base[nr], exp[n] );
+        NormalForm& nfnr = getNormalForm(nr);
+        Node ns = getNormalString(nfnr.d_base, exp[n]);
         subs.push_back( ns );
-        Trace("strings-subs") << "   normal eqc : " << ns << " " << d_normal_forms_base[nr] << " " << nr << std::endl;
-        if( !d_normal_forms_base[nr].isNull() ) {
-          addToExplanation( n, d_normal_forms_base[nr], exp[n] );
+        Trace("strings-subs") << "   normal eqc : " << ns << " " << nfnr.d_base
+                              << " " << nr << std::endl;
+        if (!nfnr.d_base.isNull())
+        {
+          addToExplanation(n, nfnr.d_base, exp[n]);
         }
       }else{
         //representative?
@@ -671,8 +674,8 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
       //check if col[i][j] has only variables
       if (!eqc.isConst())
       {
-        Assert(d_normal_forms.find(eqc) != d_normal_forms.end());
-        if (d_normal_forms[eqc].size() == 1)
+        NormalForm& nfe = getNormalForm(eqc);
+        if (nfe.d_nf.size() == 1)
         {
           // does it have a code and the length of these equivalence classes are
           // one?
@@ -795,20 +798,31 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
   //step 4 : assign constants to all other equivalence classes
   for( unsigned i=0; i<nodes.size(); i++ ){
     if( processed.find( nodes[i] )==processed.end() ){
-      Assert( d_normal_forms.find( nodes[i] )!=d_normal_forms.end() );
-      Trace("strings-model") << "Construct model for " << nodes[i] << " based on normal form ";
-      for( unsigned j=0; j<d_normal_forms[nodes[i]].size(); j++ ) {
-        if( j>0 ) Trace("strings-model") << " ++ ";
-        Trace("strings-model") << d_normal_forms[nodes[i]][j];
-        Node r = getRepresentative( d_normal_forms[nodes[i]][j] );
-        if( !r.isConst() && processed.find( r )==processed.end() ){
-          Trace("strings-model") << "(UNPROCESSED)";
+      NormalForm& nf = getNormalForm(nodes[i]);
+      if (Trace.isOn("strings-model"))
+      {
+        Trace("strings-model")
+            << "Construct model for " << nodes[i] << " based on normal form ";
+        for (unsigned j = 0, size = nf.d_nf.size(); j < size; j++)
+        {
+          Node n = nf.d_nf[j];
+          if (j > 0)
+          {
+            Trace("strings-model") << " ++ ";
+          }
+          Trace("strings-model") << n;
+          Node r = getRepresentative(n);
+          if (!r.isConst() && processed.find(r) == processed.end())
+          {
+            Trace("strings-model") << "(UNPROCESSED)";
+          }
         }
       }
       Trace("strings-model") << std::endl;
       std::vector< Node > nc;
-      for( unsigned j=0; j<d_normal_forms[nodes[i]].size(); j++ ) {
-        Node r = getRepresentative( d_normal_forms[nodes[i]][j] );
+      for (const Node& n : nf.d_nf)
+      {
+        Node r = getRepresentative(n);
         Assert( r.isConst() || processed.find( r )!=processed.end() );
         nc.push_back(r.isConst() ? r : processed[r]);
       }
@@ -2475,8 +2489,7 @@ void TheoryStrings::checkNormalFormsEq()
   }
   // calculate normal forms for each equivalence class, possibly adding
   // splitting lemmas
-  d_normal_forms.clear();
-  d_normal_forms_exp.clear();
+  d_normal_form.clear();
   std::map<Node, Node> nf_to_eqc;
   std::map<Node, Node> eqc_to_nf;
   std::map<Node, Node> eqc_to_exp;
@@ -2490,16 +2503,17 @@ void TheoryStrings::checkNormalFormsEq()
     {
       return;
     }
-    Node nf_term = mkConcat(d_normal_forms[eqc]);
+    NormalForm& nfe = getNormalForm(eqc);
+    Node nf_term = mkConcat(nfe.d_nf);
     std::map<Node, Node>::iterator itn = nf_to_eqc.find(nf_term);
     if (itn != nf_to_eqc.end())
     {
+      NormalForm& nfe_eq = getNormalForm(itn->second);
       // two equivalence classes have same normal form, merge
       std::vector<Node> nf_exp;
-      nf_exp.push_back(mkAnd(d_normal_forms_exp[eqc]));
+      nf_exp.push_back(mkAnd(nfe.d_exp));
       nf_exp.push_back(eqc_to_exp[itn->second]);
-      Node eq =
-          d_normal_forms_base[eqc].eqNode(d_normal_forms_base[itn->second]);
+      Node eq = nfe.d_base.eqNode(nfe_eq.d_base);
       sendInference(nf_exp, eq, "Normal_Form");
       if( hasProcessed() ){
         return;
@@ -2509,7 +2523,7 @@ void TheoryStrings::checkNormalFormsEq()
     {
       nf_to_eqc[nf_term] = eqc;
       eqc_to_nf[eqc] = nf_term;
-      eqc_to_exp[eqc] = mkAnd(d_normal_forms_exp[eqc]);
+      eqc_to_exp[eqc] = mkAnd(nfe.d_exp);
     }
     Trace("strings-process-debug")
         << "Done verifying normal forms are the same for " << eqc << std::endl;
@@ -2521,8 +2535,8 @@ void TheoryStrings::checkNormalFormsEq()
          it != eqc_to_exp.end();
          ++it)
     {
-      Trace("strings-nf") << "  N[" << it->first << "] (base "
-                          << d_normal_forms_base[it->first]
+      NormalForm& nf = getNormalForm(it->first);
+      Trace("strings-nf") << "  N[" << it->first << "] (base " << nf.d_base
                           << ") = " << eqc_to_nf[it->first] << std::endl;
       Trace("strings-nf") << "     exp: " << it->second << std::endl;
     }
@@ -2545,9 +2559,10 @@ void TheoryStrings::checkCodes()
     std::vector<Node> const_codes;
     for (const Node& eqc : d_strings_eqc)
     {
-      if (d_normal_forms[eqc].size() == 1 && d_normal_forms[eqc][0].isConst())
+      NormalForm& nfe = getNormalForm(eqc);
+      if (nfe.d_nf.size() == 1 && nfe.d_nf[0].isConst())
       {
-        Node c = d_normal_forms[eqc][0];
+        Node c = nfe.d_nf[0];
         Trace("strings-code-debug") << "Get proxy variable for " << c
                                     << std::endl;
         Node cc = nm->mkNode(kind::STRING_CODE, c);
@@ -2616,83 +2631,65 @@ void TheoryStrings::normalizeEquivalenceClass( Node eqc ) {
 #endif
     //do nothing
     Trace("strings-process-debug") << "Return process equivalence class " << eqc << " : empty." << std::endl;
-    d_normal_forms_base[eqc] = d_emptyString;
-    d_normal_forms[eqc].clear();
-    d_normal_forms_exp[eqc].clear();
+    d_normal_form[eqc].init(d_emptyString);
   } else {
-    Assert( d_normal_forms.find(eqc)==d_normal_forms.end() );
-    //phi => t = s1 * ... * sn
-    // normal form for each non-variable term in this eqc (s1...sn)
-    std::vector< std::vector< Node > > normal_forms;
-    // explanation for each normal form (phi)
-    std::vector< std::vector< Node > > normal_forms_exp;
-    // dependency information 
-    std::vector< std::map< Node, std::map< bool, int > > > normal_forms_exp_depend;
-    // record terms for each normal form (t)
-    std::vector< Node > normal_form_src;
-    // get normal forms
-    getNormalForms(eqc, normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend);
+    // should not have computed the normal form of this equivalence class yet
+    Assert(d_normal_form.find(eqc) == d_normal_form.end());
+    // Normal forms for the relevant terms in the equivalence class of eqc
+    std::vector<NormalForm> normal_forms;
+    // map each term to its index in the above vector
+    std::map<Node, unsigned> term_to_nf_index;
+    // get the normal forms
+    getNormalForms(eqc, normal_forms, term_to_nf_index);
     if( hasProcessed() ){
       return;
     }
     // process the normal forms
-    processNEqc( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend );
+    processNEqc(normal_forms);
     if( hasProcessed() ){
       return;
     }
-    //debugPrintNormalForms( "strings-solve", eqc, normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend );
-    
+    // debugPrintNormalForms( "strings-solve", eqc, normal_forms );
+
     //construct the normal form
     Assert( !normal_forms.empty() );
-
-    int nf_index = 0;
-    std::vector< Node >::iterator itn = std::find( normal_form_src.begin(), normal_form_src.end(), eqc );
-    if( itn!=normal_form_src.end() ){
-      nf_index = itn - normal_form_src.begin();
-      Trace("strings-solve-debug2") << "take normal form " << nf_index << std::endl;
-      Assert( normal_form_src[nf_index]==eqc );
-    }else{
-      //just take the first normal form
-      Trace("strings-solve-debug2") << "take the first normal form" << std::endl;
-    }
-    d_normal_forms[eqc].insert( d_normal_forms[eqc].end(), normal_forms[nf_index].begin(), normal_forms[nf_index].end() );
-    d_normal_forms_exp[eqc].insert( d_normal_forms_exp[eqc].end(), normal_forms_exp[nf_index].begin(), normal_forms_exp[nf_index].end() );
-    Trace("strings-solve-debug2") << "take normal form ... done" << std::endl;
-    d_normal_forms_base[eqc] = normal_form_src[nf_index];
-    //track dependencies 
-    for( unsigned i=0; i<normal_forms_exp[nf_index].size(); i++ ){
-      Node exp = normal_forms_exp[nf_index][i];
-      for( unsigned r=0; r<2; r++ ){
-        d_normal_forms_exp_depend[eqc][exp][r==0] = normal_forms_exp_depend[nf_index][exp][r==0];
-      }
+    unsigned nf_index = 0;
+    std::map<Node, unsigned>::iterator it = term_to_nf_index.find(eqc);
+    // we prefer taking the normal form whose base is the equivalence
+    // class representative, since this leads to shorter explanations in
+    // some cases.
+    if (it != term_to_nf_index.end())
+    {
+      nf_index = it->second;
     }
-    Trace("strings-process-debug") << "Return process equivalence class " << eqc << " : returned, size = " << d_normal_forms[eqc].size() << std::endl;
+    d_normal_form[eqc] = normal_forms[nf_index];
+    Trace("strings-process-debug")
+        << "Return process equivalence class " << eqc
+        << " : returned, size = " << d_normal_form[eqc].d_nf.size()
+        << std::endl;
   }
 }
 
-void trackNfExpDependency( std::vector< Node >& nf_exp_n, std::map< Node, std::map< bool, int > >& nf_exp_depend_n, Node exp, int new_val, int new_rev_val ){
-  if( std::find( nf_exp_n.begin(), nf_exp_n.end(), exp )==nf_exp_n.end() ){
-    nf_exp_n.push_back( exp );
-  }
-  for( unsigned k=0; k<2; k++ ){
-    int val = k==0 ? new_val : new_rev_val;
-    std::map< bool, int >::iterator itned = nf_exp_depend_n[exp].find( k==1 );
-    if( itned==nf_exp_depend_n[exp].end() ){
-      Trace("strings-process-debug") << "Deps : set dependency on " << exp << " to " << val << " isRev=" << (k==0) << std::endl;
-      nf_exp_depend_n[exp][k==1] = val;
-    }else{
-      Trace("strings-process-debug") << "Deps : Multiple dependencies on " << exp << " : " << itned->second << " " << val << " isRev=" << (k==0) << std::endl;
-      //if we already have a dependency (in the case of non-linear string equalities), it is min/max
-      bool cmp = val > itned->second;
-      if( cmp==(k==1) ){
-        nf_exp_depend_n[exp][k==1] = val;
-      }
-    }
+NormalForm& TheoryStrings::getNormalForm(Node n)
+{
+  std::map<Node, NormalForm>::iterator itn = d_normal_form.find(n);
+  if (itn == d_normal_form.end())
+  {
+    Trace("strings-warn") << "WARNING: returning empty normal form for " << n
+                          << std::endl;
+    // Shouln't ask for normal forms of strings that weren't computed. This
+    // likely means that n is not a representative or not a term in the current
+    // context. We simply return a default normal form here in this case.
+    Assert(false);
+    return d_normal_form[n];
   }
+  return itn->second;
 }
 
-void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
-                                    std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend ) {
+void TheoryStrings::getNormalForms(Node eqc,
+                                   std::vector<NormalForm>& normal_forms,
+                                   std::map<Node, unsigned>& term_to_nf_index)
+{
   //constant for equivalence class
   Node eqc_non_c = eqc;
   Trace("strings-process-debug") << "Get normal forms " << eqc << std::endl;
@@ -2700,79 +2697,115 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node >
   while( !eqc_i.isFinished() ){
     Node n = (*eqc_i);
     if( d_congruent.find( n )==d_congruent.end() ){
-      if( n.getKind() == kind::CONST_STRING || n.getKind() == kind::STRING_CONCAT ){
+      if (n.getKind() == CONST_STRING || n.getKind() == STRING_CONCAT)
+      {
         Trace("strings-process-debug") << "Get Normal Form : Process term " << n << " in eqc " << eqc << std::endl;
-        std::vector< Node > nf_n;
-        std::vector< Node > nf_exp_n;
-        std::map< Node, std::map< bool, int > > nf_exp_depend_n;
-        if( n.getKind()==kind::CONST_STRING ){
-          if( n!=d_emptyString ) {
-            nf_n.push_back( n );
-          }
-        }else if( n.getKind()==kind::STRING_CONCAT ){
+        NormalForm nf_curr;
+        if (n.getKind() == CONST_STRING)
+        {
+          nf_curr.init(n);
+        }
+        else if (n.getKind() == STRING_CONCAT)
+        {
+          // set the base to n, we construct the other portions of nf_curr in
+          // the following.
+          nf_curr.d_base = n;
           for( unsigned i=0; i<n.getNumChildren(); i++ ) {
             Node nr = d_equalityEngine.getRepresentative( n[i] );
+            // get the normal form for the component
+            NormalForm& nfr = getNormalForm(nr);
+            std::vector<Node>& nfrv = nfr.d_nf;
             Trace("strings-process-debug") << "Normalizing subterm " << n[i] << " = "  << nr << std::endl;
-            Assert( d_normal_forms.find( nr )!=d_normal_forms.end() );
-            unsigned orig_size = nf_n.size();
-            unsigned add_size = d_normal_forms[nr].size();
+            unsigned orig_size = nf_curr.d_nf.size();
+            unsigned add_size = nfrv.size();
             //if not the empty string, add to current normal form
-            if( !d_normal_forms[nr].empty() ){
-              for( unsigned r=0; r<d_normal_forms[nr].size(); r++ ) {
-                if( Trace.isOn("strings-error") ) {
-                  if( d_normal_forms[nr][r].getKind()==kind::STRING_CONCAT ){
-                    Trace("strings-error") << "Strings::Error: From eqc = " << eqc << ", " << n << " index " << i << ", bad normal form : ";
-                    for( unsigned rr=0; rr<d_normal_forms[nr].size(); rr++ ) {
-                      Trace("strings-error") << d_normal_forms[nr][rr] << " ";
+            if (!nfrv.empty())
+            {
+              // if in a build with assertions, we run the following block,
+              // which checks that normal forms do not have concat terms.
+              if (Configuration::isAssertionBuild())
+              {
+                for (const Node& nn : nfrv)
+                {
+                  if (Trace.isOn("strings-error"))
+                  {
+                    if (nn.getKind() == STRING_CONCAT)
+                    {
+                      Trace("strings-error")
+                          << "Strings::Error: From eqc = " << eqc << ", " << n
+                          << " index " << i << ", bad normal form : ";
+                      for (unsigned rr = 0; rr < nfrv.size(); rr++)
+                      {
+                        Trace("strings-error") << nfrv[rr] << " ";
+                      }
+                      Trace("strings-error") << std::endl;
                     }
-                    Trace("strings-error") << std::endl;
                   }
+                  Assert(nn.getKind() != kind::STRING_CONCAT);
                 }
-                Assert( d_normal_forms[nr][r].getKind()!=kind::STRING_CONCAT );
               }
-              nf_n.insert( nf_n.end(), d_normal_forms[nr].begin(), d_normal_forms[nr].end() );
+              nf_curr.d_nf.insert(nf_curr.d_nf.end(), nfrv.begin(), nfrv.end());
             }
-
-            for( unsigned j=0; j<d_normal_forms_exp[nr].size(); j++ ){
-              Node exp = d_normal_forms_exp[nr][j];
-              //track depends
-              trackNfExpDependency( nf_exp_n, nf_exp_depend_n, exp,
-                                    orig_size + d_normal_forms_exp_depend[nr][exp][false], 
-                                    orig_size + ( add_size - d_normal_forms_exp_depend[nr][exp][true] ) );
+            // Track explanation for the normal form. This is in two parts.
+            // First, we must carry the explanation of the normal form computed
+            // for the representative nr.
+            for (const Node& exp : nfr.d_exp)
+            {
+              // The explanation is only relevant for the subsegment it was
+              // previously relevant for, shifted now based on its relative
+              // placement in the normal form of n.
+              nf_curr.addToExplanation(
+                  exp,
+                  orig_size + nfr.d_expDep[exp][false],
+                  orig_size + (add_size - nfr.d_expDep[exp][true]));
             }
-            if( d_normal_forms_base[nr]!=n[i] ){
-              Assert( d_normal_forms_base.find( nr )!=d_normal_forms_base.end() );
-              Node eq = n[i].eqNode( d_normal_forms_base[nr] );
-              //track depends : entire current segment is dependent upon base equality
-              trackNfExpDependency( nf_exp_n, nf_exp_depend_n, eq, orig_size, orig_size + add_size );
+            // Second, must explain that the component n[i] is equal to the
+            // base of the normal form for nr.
+            Node base = nfr.d_base;
+            if (base != n[i])
+            {
+              Node eq = n[i].eqNode(base);
+              // The equality is relevant for the entire current segment
+              nf_curr.addToExplanation(eq, orig_size, orig_size + add_size);
             }
           }
-          //convert forward indices to reverse indices
-          int total_size = nf_n.size();
-          for( std::map< Node, std::map< bool, int > >::iterator it = nf_exp_depend_n.begin(); it != nf_exp_depend_n.end(); ++it ){
-            it->second[true] = total_size - it->second[true];
-            Assert( it->second[true]>=0 );
+          // Now that we are finished with the loop, we convert forward indices
+          // to reverse indices in the explanation dependency information
+          int total_size = nf_curr.d_nf.size();
+          for (std::pair<const Node, std::map<bool, unsigned> >& ed :
+               nf_curr.d_expDep)
+          {
+            ed.second[true] = total_size - ed.second[true];
+            Assert(ed.second[true] >= 0);
           }
         }
         //if not equal to self
-        if( nf_n.size()>1 || ( nf_n.size()==1 && nf_n[0].getKind()==kind::CONST_STRING ) ){
-          if( nf_n.size()>1 ) {
-            for( unsigned i=0; i<nf_n.size(); i++ ){
-              if( Trace.isOn("strings-error") ){
-                Trace("strings-error") << "Cycle for normal form ";
-                printConcat(nf_n,"strings-error");
-                Trace("strings-error") << "..." << nf_n[i] << std::endl;
+        std::vector<Node>& currv = nf_curr.d_nf;
+        if (currv.size() > 1
+            || (currv.size() == 1 && currv[0].getKind() == CONST_STRING))
+        {
+          // if in a build with assertions, check that normal form is acyclic
+          if (Configuration::isAssertionBuild())
+          {
+            if (currv.size() > 1)
+            {
+              for (unsigned i = 0; i < currv.size(); i++)
+              {
+                if (Trace.isOn("strings-error"))
+                {
+                  Trace("strings-error") << "Cycle for normal form ";
+                  printConcat(currv, "strings-error");
+                  Trace("strings-error") << "..." << currv[i] << std::endl;
+                }
+                Assert(!areEqual(currv[i], n));
               }
-              Assert( !areEqual( nf_n[i], n ) );
             }
           }
-          normal_forms.push_back(nf_n);
-          normal_form_src.push_back(n);
-          normal_forms_exp.push_back(nf_exp_n);
-          normal_forms_exp_depend.push_back(nf_exp_depend_n);
+          term_to_nf_index[n] = normal_forms.size();
+          normal_forms.push_back(nf_curr);
         }else{
           //this was redundant: combination of self + empty string(s)
-          Node nn = nf_n.size()==0 ? d_emptyString : nf_n[0];
+          Node nn = currv.size() == 0 ? d_emptyString : currv[0];
           Assert( areEqual( nn, eqc ) );
         }
       }else{
@@ -2784,41 +2817,56 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node >
 
   if( normal_forms.empty() ) {
     Trace("strings-solve-debug2") << "construct the normal form" << std::endl;
-    //do not choose a concat here use "eqc_non_c" (in this case they have non-trivial explanation why they normalize to self)
-    std::vector< Node > eqc_non_c_nf;
-    getConcatVec( eqc_non_c, eqc_non_c_nf );
-    normal_forms.push_back( eqc_non_c_nf );
-    normal_form_src.push_back( eqc_non_c );
-    normal_forms_exp.push_back( std::vector< Node >() );
-    normal_forms_exp_depend.push_back( std::map< Node, std::map< bool, int > >() );
+    // This case happens when there are no non-trivial normal forms for this
+    // equivalence class. For example, given assertions:
+    //   { x = y ++ z, x = y, z = "" }
+    // The equivalence class of { x, y, y ++ z } is such that the normal form
+    // of all terms is a variable (either x or y) in the equivalence class
+    // itself. Thus, the normal form of this equivalence class can be assigned
+    // to one of these variables.
+    // We use a non-concatenation term among the terms in this equivalence
+    // class, which is stored in eqc_non_c. The reason is this does not require
+    // an explanation, whereas e.g. y ++ z would require the explanation z = ""
+    // to justify its normal form is y.
+    Assert(eqc_non_c.getKind() != STRING_CONCAT);
+    NormalForm nf_triv;
+    nf_triv.init(eqc_non_c);
+    normal_forms.push_back(nf_triv);
   }else{
     if(Trace.isOn("strings-solve")) {
       Trace("strings-solve") << "--- Normal forms for equivalance class " << eqc << " : " << std::endl;
-      for( unsigned i=0; i<normal_forms.size(); i++ ) {
-        Trace("strings-solve") << "#" << i << " (from " << normal_form_src[i] << ") : ";
-        for( unsigned j=0; j<normal_forms[i].size(); j++ ) {
+      for (unsigned i = 0, size = normal_forms.size(); i < size; i++)
+      {
+        NormalForm& nf = normal_forms[i];
+        Trace("strings-solve") << "#" << i << " (from " << nf.d_base << ") : ";
+        for (unsigned j = 0, sizej = nf.d_nf.size(); j < sizej; j++)
+        {
           if(j>0) {
             Trace("strings-solve") << ", ";
           }
-          Trace("strings-solve") << normal_forms[i][j];
+          Trace("strings-solve") << nf.d_nf[j];
         }
         Trace("strings-solve") << std::endl;
         Trace("strings-solve") << "   Explanation is : ";
-        if(normal_forms_exp[i].size() == 0) {
+        if (nf.d_exp.size() == 0)
+        {
           Trace("strings-solve") << "NONE";
         } else {
-          for( unsigned j=0; j<normal_forms_exp[i].size(); j++ ) {
+          for (unsigned j = 0, sizej = nf.d_exp.size(); j < sizej; j++)
+          {
             if(j>0) {
               Trace("strings-solve") << " AND ";
             }
-            Trace("strings-solve") << normal_forms_exp[i][j];
+            Trace("strings-solve") << nf.d_exp[j];
           }
           Trace("strings-solve") << std::endl;
           Trace("strings-solve") << "WITH DEPENDENCIES : " << std::endl;
-          for( unsigned j=0; j<normal_forms_exp[i].size(); j++ ) {
-            Trace("strings-solve") << "   " << normal_forms_exp[i][j] << " -> ";
-            Trace("strings-solve") << normal_forms_exp_depend[i][normal_forms_exp[i][j]][false] << ",";
-            Trace("strings-solve") << normal_forms_exp_depend[i][normal_forms_exp[i][j]][true] << std::endl;
+          for (unsigned j = 0, sizej = nf.d_exp.size(); j < sizej; j++)
+          {
+            Node exp = nf.d_exp[j];
+            Trace("strings-solve") << "   " << exp << " -> ";
+            Trace("strings-solve") << nf.d_expDep[exp][false] << ",";
+            Trace("strings-solve") << nf.d_expDep[exp][true] << std::endl;
           }
         }
         Trace("strings-solve") << std::endl;
@@ -2832,10 +2880,14 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node >
     Node c = getConstantEqc( eqc );
     if( !c.isNull() ){
       Trace("strings-solve") << "Eqc is constant " << c << std::endl;
-      for( unsigned i=0; i<normal_forms.size(); i++ ) {
+      for (unsigned i = 0, size = normal_forms.size(); i < size; i++)
+      {
+        NormalForm& nf = normal_forms[i];
         int firstc, lastc;
-        if( !TheoryStringsRewriter::canConstantContainList( c, normal_forms[i], firstc, lastc ) ){
-          Node n = normal_form_src[i];
+        if (!TheoryStringsRewriter::canConstantContainList(
+                c, nf.d_nf, firstc, lastc))
+        {
+          Node n = nf.d_base;
           //conflict
           Trace("strings-solve") << "Normal form for " << n << " cannot be contained in constant " << c << std::endl;
           //conflict, explanation is n = base ^ base = c ^ relevant porition of ( n = N[n] )
@@ -2847,7 +2899,7 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node >
             exp.push_back( d_eqc_to_const_exp[eqc] );
           }
           //TODO: this can be minimized based on firstc/lastc, normal_forms_exp_depend
-          exp.insert( exp.end(), normal_forms_exp[i].begin(), normal_forms_exp[i].end() );
+          exp.insert(exp.end(), nf.d_exp.begin(), nf.d_exp.end());
           Node conc = d_false;
           sendInference( exp, conc, "N_NCTN" );
         }
@@ -2856,52 +2908,29 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node >
   }
 }
 
-void TheoryStrings::getExplanationVectorForPrefix( std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
-                                                   unsigned i, int index, bool isRev, std::vector< Node >& curr_exp ) {
-  if( index==-1 || !options::stringMinPrefixExplain() ){
-    curr_exp.insert(curr_exp.end(), normal_forms_exp[i].begin(), normal_forms_exp[i].end() );
-  }else{
-    for( unsigned k=0; k<normal_forms_exp[i].size(); k++ ){
-      Node exp = normal_forms_exp[i][k];
-      int dep = normal_forms_exp_depend[i][exp][isRev];
-      if( dep<=index ){
-        curr_exp.push_back( exp );
-        Trace("strings-explain-prefix-debug") << "  include : " << exp << std::endl;
-      }else{
-        Trace("strings-explain-prefix-debug") << "  exclude : " << exp << std::endl;
-      }
-    }
-  }
-}
-
-void TheoryStrings::getExplanationVectorForPrefixEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
-                                                     std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
-                                                     unsigned i, unsigned j, int index_i, int index_j, bool isRev, std::vector< Node >& curr_exp ) {
-  Trace("strings-explain-prefix") << "Get explanation for prefix " << index_i << ", " << index_j << " of normal forms " << i << " and " << j << ", reverse = " << isRev << std::endl;
-  for( unsigned r=0; r<2; r++ ){
-    getExplanationVectorForPrefix( normal_forms_exp, normal_forms_exp_depend, r==0 ? i : j, r==0 ? index_i : index_j, isRev, curr_exp );
-  }
-  Trace("strings-explain-prefix") << "Included " << curr_exp.size() << " / " << ( normal_forms_exp[i].size() + normal_forms_exp[j].size() ) << std::endl;
-  addToExplanation( normal_form_src[i], normal_form_src[j], curr_exp );
-}
-
-
-void TheoryStrings::processNEqc( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
-                                 std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend ){
+void TheoryStrings::processNEqc(std::vector<NormalForm>& normal_forms)
+{
   //the possible inferences
   std::vector< InferInfo > pinfer;
   // loop over all pairs 
   for(unsigned i=0; i<normal_forms.size()-1; i++) {
     //unify each normalform[j] with normal_forms[i]
     for(unsigned j=i+1; j<normal_forms.size(); j++ ) {
+      NormalForm& nfi = normal_forms[i];
+      NormalForm& nfj = normal_forms[j];
       //ensure that normal_forms[i] and normal_forms[j] are the same modulo equality, add to pinfer if not
       Trace("strings-solve") << "Strings: Process normal form #" << i << " against #" << j << "..." << std::endl;
-      if( isNormalFormPair( normal_form_src[i], normal_form_src[j] ) ) {
+      if (isNormalFormPair(nfi.d_base, nfj.d_base))
+      {
         Trace("strings-solve") << "Strings: Already cached." << std::endl;
       }else{
         //process the reverse direction first (check for easy conflicts and inferences)
         unsigned rindex = 0;
-        processReverseNEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, rindex, 0, pinfer );
+        nfi.reverse();
+        nfj.reverse();
+        processSimpleNEq(nfi, nfj, rindex, true, 0, pinfer);
+        nfi.reverse();
+        nfj.reverse();
         if( hasProcessed() ){
           return;
         }else if( !pinfer.empty() && pinfer.back().d_id==1 ){
@@ -2911,7 +2940,7 @@ void TheoryStrings::processNEqc( std::vector< std::vector< Node > > &normal_form
         //rindex = 0;
 
         unsigned index = 0;
-        processSimpleNEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, false, rindex, pinfer );
+        processSimpleNEq(nfi, nfj, index, false, rindex, pinfer);
         if( hasProcessed() ){
           return;
         }else if( !pinfer.empty() && pinfer.back().d_id==1 ){
@@ -2977,86 +3006,92 @@ bool TheoryStrings::InferInfo::sendAsLemma() {
   return true;
 }
 
-void TheoryStrings::processReverseNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
-                                       std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
-                                       unsigned i, unsigned j, unsigned& index, unsigned rproc, std::vector< InferInfo >& pinfer ) {
-  //reverse normal form of i, j
-  std::reverse( normal_forms[i].begin(), normal_forms[i].end() );
-  std::reverse( normal_forms[j].begin(), normal_forms[j].end() );
-
-  processSimpleNEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, true, rproc, pinfer );
-
-  //reverse normal form of i, j
-  std::reverse( normal_forms[i].begin(), normal_forms[i].end() );
-  std::reverse( normal_forms[j].begin(), normal_forms[j].end() );
-}
-
-//rproc is the # is the size of suffix that is identical
-void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, 
-                                      std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
-                                      unsigned i, unsigned j, unsigned& index, bool isRev, unsigned rproc, std::vector< InferInfo >& pinfer ) {
-  Assert( rproc<=normal_forms[i].size() && rproc<=normal_forms[j].size() );
+void TheoryStrings::processSimpleNEq(NormalForm& nfi,
+                                     NormalForm& nfj,
+                                     unsigned& index,
+                                     bool isRev,
+                                     unsigned rproc,
+                                     std::vector<InferInfo>& pinfer)
+{
+  std::vector<Node>& nfiv = nfi.d_nf;
+  std::vector<Node>& nfjv = nfj.d_nf;
+  NodeManager* nm = NodeManager::currentNM();
+  Assert(rproc <= nfiv.size() && rproc <= nfjv.size());
   bool success;
   do {
     success = false;
     //if we are at the end
-    if( index==(normal_forms[i].size()-rproc) || index==(normal_forms[j].size()-rproc) ){
-      if( index==(normal_forms[i].size()-rproc)  && index==(normal_forms[j].size()-rproc)  ){
+    if (index == (nfiv.size() - rproc) || index == (nfjv.size() - rproc))
+    {
+      if (index == (nfiv.size() - rproc) && index == (nfjv.size() - rproc))
+      {
         //we're done
       }else{
         //the remainder must be empty
-        unsigned k = index==(normal_forms[i].size()-rproc) ? j : i;
+        NormalForm& nfk = index == (nfiv.size() - rproc) ? nfj : nfi;
+        std::vector<Node>& nfkv = nfk.d_nf;
         unsigned index_k = index;
         //Node eq_exp = mkAnd( curr_exp );
         std::vector< Node > curr_exp;
-        getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, -1, -1, isRev, curr_exp );
-        while( !d_conflict && index_k<(normal_forms[k].size()-rproc) ){
+        NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, curr_exp);
+        while (!d_conflict && index_k < (nfkv.size() - rproc))
+        {
           //can infer that this string must be empty
-          Node eq = normal_forms[k][index_k].eqNode( d_emptyString );
+          Node eq = nfkv[index_k].eqNode(d_emptyString);
           //Trace("strings-lemma") << "Strings: Infer " << eq << " from " << eq_exp << std::endl;
-          Assert( !areEqual( d_emptyString, normal_forms[k][index_k] ) );
+          Assert(!areEqual(d_emptyString, nfkv[index_k]));
           sendInference( curr_exp, eq, "N_EndpointEmp" );
           index_k++;
         }
       }
     }else{
-      Trace("strings-solve-debug") << "Process " << normal_forms[i][index] << " ... " << normal_forms[j][index] << std::endl;
-      if( normal_forms[i][index]==normal_forms[j][index] ){
+      Trace("strings-solve-debug")
+          << "Process " << nfiv[index] << " ... " << nfjv[index] << std::endl;
+      if (nfiv[index] == nfjv[index])
+      {
         Trace("strings-solve-debug") << "Simple Case 1 : strings are equal" << std::endl;
         index++;
         success = true;
       }else{
-        Assert( !areEqual(normal_forms[i][index], normal_forms[j][index]) );
+        Assert(!areEqual(nfiv[index], nfjv[index]));
         std::vector< Node > temp_exp;
-        Node length_term_i = getLength( normal_forms[i][index], temp_exp );
-        Node length_term_j = getLength( normal_forms[j][index], temp_exp );
-        //check  length(normal_forms[i][index]) == length(normal_forms[j][index])
+        Node length_term_i = getLength(nfiv[index], temp_exp);
+        Node length_term_j = getLength(nfjv[index], temp_exp);
+        // check  length(nfiv[index]) == length(nfjv[index])
         if( areEqual( length_term_i, length_term_j ) ){
           Trace("strings-solve-debug") << "Simple Case 2 : string lengths are equal" << std::endl;
-          Node eq = normal_forms[i][index].eqNode( normal_forms[j][index] );
+          Node eq = nfiv[index].eqNode(nfjv[index]);
           //eq = Rewriter::rewrite( eq );
           Node length_eq = length_term_i.eqNode( length_term_j );
           //temp_exp.insert(temp_exp.end(), curr_exp.begin(), curr_exp.end() );
-          getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, index, isRev, temp_exp );
+          NormalForm::getExplanationForPrefixEq(
+              nfi, nfj, index, index, temp_exp);
           temp_exp.push_back(length_eq);
           sendInference( temp_exp, eq, "N_Unify" );
           return;
-        }else if( ( normal_forms[i][index].getKind()!=kind::CONST_STRING && index==normal_forms[i].size()-rproc-1 ) ||
-                  ( normal_forms[j][index].getKind()!=kind::CONST_STRING && index==normal_forms[j].size()-rproc-1 ) ){
+        }
+        else if ((nfiv[index].getKind() != CONST_STRING
+                  && index == nfiv.size() - rproc - 1)
+                 || (nfjv[index].getKind() != CONST_STRING
+                     && index == nfjv.size() - rproc - 1))
+        {
           Trace("strings-solve-debug") << "Simple Case 3 : at endpoint" << std::endl;
           std::vector< Node > antec;
           //antec.insert(antec.end(), curr_exp.begin(), curr_exp.end() );
-          getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, -1, -1, isRev, antec );
+          NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, antec);
           std::vector< Node > eqn;
           for( unsigned r=0; r<2; r++ ) {
-            int index_k = index;
-            int k = r==0 ? i : j;
+            NormalForm& nfk = r == 0 ? nfi : nfj;
+            std::vector<Node>& nfkv = nfk.d_nf;
             std::vector< Node > eqnc;
-            for( unsigned index_l=index_k; index_l<(normal_forms[k].size()-rproc); index_l++ ) {
+            for (unsigned index_l = index, size = (nfkv.size() - rproc);
+                 index_l < size;
+                 index_l++)
+            {
               if(isRev) {
-                eqnc.insert(eqnc.begin(), normal_forms[k][index_l] );
+                eqnc.insert(eqnc.begin(), nfkv[index_l]);
               } else {
-                eqnc.push_back( normal_forms[k][index_l] );
+                eqnc.push_back(nfkv[index_l]);
               }
             }
             eqn.push_back( mkConcat( eqnc ) );
@@ -3065,49 +3100,46 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
             sendInference( antec, eqn[0].eqNode( eqn[1] ), "N_EndpointEq", true );
             return;
           }else{
-            Assert( normal_forms[i].size()==normal_forms[j].size() );
-            index = normal_forms[i].size()-rproc;
+            Assert(nfiv.size() == nfjv.size());
+            index = nfiv.size() - rproc;
           }
-        }else if( normal_forms[i][index].isConst() && normal_forms[j][index].isConst() ){
-          Node const_str = normal_forms[i][index];
-          Node other_str = normal_forms[j][index];
+        }
+        else if (nfiv[index].isConst() && nfjv[index].isConst())
+        {
+          Node const_str = nfiv[index];
+          Node other_str = nfjv[index];
           Trace("strings-solve-debug") << "Simple Case 3 : Const Split : " << const_str << " vs " << other_str << " at index " << index << ", isRev = " << isRev << std::endl;
           unsigned len_short = const_str.getConst<String>().size() <= other_str.getConst<String>().size() ? const_str.getConst<String>().size() : other_str.getConst<String>().size();
           bool isSameFix = isRev ? const_str.getConst<String>().rstrncmp(other_str.getConst<String>(), len_short): const_str.getConst<String>().strncmp(other_str.getConst<String>(), len_short);
           if( isSameFix ) {
             //same prefix/suffix
+            bool constCmp = const_str.getConst<String>().size()
+                            < other_str.getConst<String>().size();
             //k is the index of the string that is shorter
-            int k = const_str.getConst<String>().size()<other_str.getConst<String>().size() ? i : j;
-            int l = const_str.getConst<String>().size()<other_str.getConst<String>().size() ? j : i;
-            //update the nf exp dependencies
-            //notice this is not critical for soundness: not doing the below incrementing will only lead to overapproximating when antecedants are required in explanations
-            for( std::map< Node, std::map< bool, int > >::iterator itnd = normal_forms_exp_depend[l].begin(); itnd != normal_forms_exp_depend[l].end(); ++itnd ){
-              for( std::map< bool, int >::iterator itnd2 = itnd->second.begin(); itnd2 != itnd->second.end(); ++itnd2 ){
-                //see if this can be incremented: it can if it is not relevant to the current index
-                Assert( itnd2->second>=0 && itnd2->second<=(int)normal_forms[l].size() );
-                bool increment = (itnd2->first==isRev) ? itnd2->second>(int)index : ( (int)normal_forms[l].size()-1-itnd2->second )<(int)index;
-                if( increment ){
-                  normal_forms_exp_depend[l][itnd->first][itnd2->first] = itnd2->second + 1;
-                }
-              }
-            }
+            NormalForm& nfk = constCmp ? nfi : nfj;
+            std::vector<Node>& nfkv = nfk.d_nf;
+            NormalForm& nfl = constCmp ? nfj : nfi;
+            std::vector<Node>& nflv = nfl.d_nf;
+            Node remainderStr;
             if( isRev ){
-              int new_len = normal_forms[l][index].getConst<String>().size() - len_short;
-              Node remainderStr = NodeManager::currentNM()->mkConst( normal_forms[l][index].getConst<String>().substr(0, new_len) );
-              Trace("strings-solve-debug-test") << "Break normal form of " << normal_forms[l][index] << " into " << normal_forms[k][index] << ", " << remainderStr << std::endl;
-              normal_forms[l].insert( normal_forms[l].begin()+index + 1, remainderStr );
+              int new_len = nflv[index].getConst<String>().size() - len_short;
+              remainderStr = nm->mkConst(
+                  nflv[index].getConst<String>().substr(0, new_len));
             }else{
-              Node remainderStr = NodeManager::currentNM()->mkConst(normal_forms[l][index].getConst<String>().substr(len_short));
-              Trace("strings-solve-debug-test") << "Break normal form of " << normal_forms[l][index] << " into " << normal_forms[k][index] << ", " << remainderStr << std::endl;
-              normal_forms[l].insert( normal_forms[l].begin()+index + 1, remainderStr );
+              remainderStr =
+                  nm->mkConst(nflv[index].getConst<String>().substr(len_short));
             }
-            normal_forms[l][index] = normal_forms[k][index];
+            Trace("strings-solve-debug-test")
+                << "Break normal form of " << nflv[index] << " into "
+                << nfkv[index] << ", " << remainderStr << std::endl;
+            nfl.splitConstant(index, nfkv[index], remainderStr);
             index++;
             success = true;
           }else{
             //conflict
             std::vector< Node > antec;
-            getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, index, isRev, antec );
+            NormalForm::getExplanationForPrefixEq(
+                nfi, nfj, index, index, antec);
             sendInference( antec, d_false, "N_Const", true );
             return;
           }
@@ -3116,17 +3148,19 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
           InferInfo info;
           info.d_index = index;
           //for debugging
-          info.d_i = i;
-          info.d_j = j;
+          info.d_i = nfi.d_base;
+          info.d_j = nfj.d_base;
           info.d_rev = isRev;
           bool info_valid = false;
-          Assert( index<normal_forms[i].size()-rproc && index<normal_forms[j].size()-rproc );
+          Assert(index < nfiv.size() - rproc && index < nfjv.size() - rproc);
           std::vector< Node > lexp;
-          Node length_term_i = getLength( normal_forms[i][index], lexp );
-          Node length_term_j = getLength( normal_forms[j][index], lexp );
+          Node length_term_i = getLength(nfiv[index], lexp);
+          Node length_term_j = getLength(nfjv[index], lexp);
           //split on equality between string lengths (note that splitting on equality between strings is worse since it is harder to process)
-          if( !areDisequal( length_term_i, length_term_j ) && !areEqual( length_term_i, length_term_j ) &&
-              normal_forms[i][index].getKind()!=kind::CONST_STRING && normal_forms[j][index].getKind()!=kind::CONST_STRING ){   //AJR: remove the latter 2 conditions?
+          if (!areDisequal(length_term_i, length_term_j)
+              && !areEqual(length_term_i, length_term_j)
+              && !nfiv[index].isConst() && !nfjv[index].isConst())
+          {  // AJR: remove the latter 2 conditions?
             Trace("strings-solve-debug") << "Non-simple Case 1 : string lengths neither equal nor disequal" << std::endl;
             //try to make the lengths equal via splitting on demand
             Node length_eq = NodeManager::currentNM()->mkNode( kind::EQUAL, length_term_i, length_term_j );
@@ -3141,33 +3175,34 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
             int loop_in_i = -1;
             int loop_in_j = -1;
             ProcessLoopResult plr = ProcessLoopResult::SKIPPED;
-            if( detectLoop( normal_forms, i, j, index, loop_in_i, loop_in_j, rproc ) ){
+            if (detectLoop(nfi, nfj, index, loop_in_i, loop_in_j, rproc))
+            {
               if( !isRev ){  //FIXME
-              getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, -1, -1, isRev, info.d_ant );
-              //set info
-              plr = processLoop(normal_forms,
-                                normal_form_src,
-                                i,
-                                j,
-                                loop_in_i != -1 ? i : j,
-                                loop_in_i != -1 ? j : i,
-                                loop_in_i != -1 ? loop_in_i : loop_in_j,
-                                index,
-                                info);
-              if (plr == ProcessLoopResult::INFERENCE)
-              {
-                info_valid = true;
-              }
+                NormalForm::getExplanationForPrefixEq(
+                    nfi, nfj, -1, -1, info.d_ant);
+                // set info
+                plr = processLoop(loop_in_i != -1 ? nfi : nfj,
+                                  loop_in_i != -1 ? nfj : nfi,
+                                  loop_in_i != -1 ? loop_in_i : loop_in_j,
+                                  index,
+                                  info);
+                if (plr == ProcessLoopResult::INFERENCE)
+                {
+                  info_valid = true;
+                }
               }
             }
 
             if (plr == ProcessLoopResult::SKIPPED)
             {
               //AJR: length entailment here?
-              if( normal_forms[i][index].getKind() == kind::CONST_STRING || normal_forms[j][index].getKind() == kind::CONST_STRING ){
-                unsigned const_k = normal_forms[i][index].getKind() == kind::CONST_STRING ? i : j;
-                unsigned nconst_k = normal_forms[i][index].getKind() == kind::CONST_STRING ? j : i;
-                Node other_str = normal_forms[nconst_k][index];
+              if (nfiv[index].isConst() || nfjv[index].isConst())
+              {
+                NormalForm& nfc = nfiv[index].isConst() ? nfi : nfj;
+                std::vector<Node>& nfcv = nfc.d_nf;
+                NormalForm& nfnc = nfiv[index].isConst() ? nfj : nfi;
+                std::vector<Node>& nfncv = nfnc.d_nf;
+                Node other_str = nfncv[index];
                 Assert( other_str.getKind()!=kind::CONST_STRING, "Other string is not constant." );
                 Assert( other_str.getKind()!=kind::STRING_CONCAT, "Other string is not CONCAT." );
                 if( !d_equalityEngine.areDisequal( other_str, d_emptyString, true ) ){
@@ -3180,12 +3215,15 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
                   if( !isRev ){  //FIXME
                   Node xnz = other_str.eqNode( d_emptyString ).negate();  
                   unsigned index_nc_k = index+1;
-                  //Node next_const_str = TheoryStringsRewriter::collectConstantStringAt( normal_forms[nconst_k], index_nc_k, false );
                   unsigned start_index_nc_k = index+1;
-                  Node next_const_str = TheoryStringsRewriter::getNextConstantAt( normal_forms[nconst_k], start_index_nc_k, index_nc_k, false );
+                  Node next_const_str =
+                      TheoryStringsRewriter::getNextConstantAt(
+                          nfncv, start_index_nc_k, index_nc_k, false);
                   if( !next_const_str.isNull() ) {         
                     unsigned index_c_k = index;
-                    Node const_str = TheoryStringsRewriter::collectConstantStringAt( normal_forms[const_k], index_c_k, false );
+                    Node const_str =
+                        TheoryStringsRewriter::collectConstantStringAt(
+                            nfcv, index_c_k, false);
                     Assert( !const_str.isNull() );
                     CVC4::String stra = const_str.getConst<String>();
                     CVC4::String strb = next_const_str.getConst<String>();
@@ -3208,9 +3246,9 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
                     }
                     if( p>1 ){
                       if( start_index_nc_k==index+1 ){
-                        info.d_ant.push_back( xnz );            
-                        getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, 
-                                                         const_k, nconst_k, index_c_k, index_nc_k, isRev, info.d_ant );   
+                        info.d_ant.push_back(xnz);
+                        NormalForm::getExplanationForPrefixEq(
+                            nfc, nfnc, index_c_k, index_nc_k, info.d_ant);
                         Node prea = p==stra.size() ? const_str : NodeManager::currentNM()->mkConst( isRev ? stra.suffix( p ) : stra.prefix( p ) );
                         Node sk = d_sk_cache.mkSkolemCached(
                             other_str,
@@ -3225,24 +3263,13 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
                         info.d_id = INFER_SSPLIT_CST_PROP;
                         info_valid = true;
                       }
-                      /*  FIXME for isRev, speculative
-                      else if( options::stringLenPropCsp() ){
-                        //propagate length constraint
-                        std::vector< Node > cc;
-                        for( unsigned i=index; i<start_index_nc_k; i++ ){
-                          cc.push_back( normal_forms[nconst_k][i] );
-                        }
-                        Node lt = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, mkConcat( cc ) );
-                        conc = NodeManager::currentNM()->mkNode( kind::GEQ, lt, NodeManager::currentNM()->mkConst( Rational(p) ) );        
-                        sendInference( ant, conc, "S-Split(CSP-P)-lprop", true );
-                      }
-                      */
                     } 
                   }
                   if( !info_valid ){
                     info.d_ant.push_back( xnz );
-                    Node const_str = normal_forms[const_k][index];
-                    getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, index, isRev, info.d_ant );
+                    Node const_str = nfcv[index];
+                    NormalForm::getExplanationForPrefixEq(
+                        nfi, nfj, index, index, info.d_ant);
                     CVC4::String stra = const_str.getConst<String>();
                     if( options::stringBinaryCsp() && stra.size()>3 ){
                       //split string in half
@@ -3285,7 +3312,7 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
                 if( options::stringCheckEntailLen() ){
                   //check entailment
                   for( unsigned e=0; e<2; e++ ){
-                    Node t = e==0 ? normal_forms[i][index] : normal_forms[j][index];
+                    Node t = e == 0 ? nfiv[index] : nfjv[index];
                     //do not infer constants are larger than variables
                     if( t.getKind()!=kind::CONST_STRING ){
                       Node lt1 = e==0 ? length_term_i : length_term_j;
@@ -3302,11 +3329,12 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
                     }
                   }
                 }
-                
-                getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, index, isRev, info.d_ant );
+
+                NormalForm::getExplanationForPrefixEq(
+                    nfi, nfj, index, index, info.d_ant);
                 //x!=e /\ y!=e
                 for(unsigned xory=0; xory<2; xory++) {
-                  Node x = xory==0 ? normal_forms[i][index] : normal_forms[j][index];
+                  Node x = xory == 0 ? nfiv[index] : nfjv[index];
                   Node xgtz = x.eqNode( d_emptyString ).negate();
                   if( d_equalityEngine.areDisequal( x, d_emptyString, true ) ) {
                     info.d_ant.push_back( xgtz );
@@ -3315,15 +3343,19 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
                   }
                 }
                 Node sk = d_sk_cache.mkSkolemCached(
-                    normal_forms[i][index],
-                    normal_forms[j][index],
+                    nfiv[index],
+                    nfjv[index],
                     isRev ? SkolemCache::SK_ID_V_SPT_REV
                           : SkolemCache::SK_ID_V_SPT,
                     "v_spt");
                 // must add length requirement
                 info.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk);
-                Node eq1 = normal_forms[i][index].eqNode( isRev ? mkConcat(sk, normal_forms[j][index]) : mkConcat(normal_forms[j][index], sk) );
-                Node eq2 = normal_forms[j][index].eqNode( isRev ? mkConcat(sk, normal_forms[i][index]) : mkConcat(normal_forms[i][index], sk) );
+                Node eq1 =
+                    nfiv[index].eqNode(isRev ? mkConcat(sk, nfjv[index])
+                                             : mkConcat(nfjv[index], sk));
+                Node eq2 =
+                    nfjv[index].eqNode(isRev ? mkConcat(sk, nfiv[index])
+                                             : mkConcat(nfiv[index], sk));
 
                 if( lentTestSuccess!=-1 ){
                   info.d_antn.push_back( lentTestExp );
@@ -3355,15 +3387,26 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal
   }while( success );
 }
 
-bool TheoryStrings::detectLoop( std::vector< std::vector< Node > > &normal_forms, int i, int j, int index, int &loop_in_i, int &loop_in_j, unsigned rproc ){
+bool TheoryStrings::detectLoop(NormalForm& nfi,
+                               NormalForm& nfj,
+                               int index,
+                               int& loop_in_i,
+                               int& loop_in_j,
+                               unsigned rproc)
+{
   int has_loop[2] = { -1, -1 };
   if( options::stringLB() != 2 ) {
     for( unsigned r=0; r<2; r++ ) {
-      int n_index = (r==0 ? i : j);
-      int other_n_index = (r==0 ? j : i);
-      if( normal_forms[other_n_index][index].getKind() != kind::CONST_STRING ) {
-        for( unsigned lp = index+1; lp<normal_forms[n_index].size()-rproc; lp++ ){
-          if( normal_forms[n_index][lp]==normal_forms[other_n_index][index] ){
+      NormalForm& nf = r == 0 ? nfi : nfj;
+      NormalForm& nfo = r == 0 ? nfj : nfi;
+      std::vector<Node>& nfv = nf.d_nf;
+      std::vector<Node>& nfov = nfo.d_nf;
+      if (!nfov[index].isConst())
+      {
+        for (unsigned lp = index + 1; lp < nfv.size() - rproc; lp++)
+        {
+          if (nfv[lp] == nfov[index])
+          {
             has_loop[r] = lp;
             break;
           }
@@ -3382,16 +3425,11 @@ bool TheoryStrings::detectLoop( std::vector< std::vector< Node > > &normal_forms
 }
 
 //xs(zy)=t(yz)xr
-TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(
-    const std::vector<std::vector<Node> >& normal_forms,
-    const std::vector<Node>& normal_form_src,
-    int i,
-    int j,
-    int loop_n_index,
-    int other_n_index,
-    int loop_index,
-    int index,
-    InferInfo& info)
+TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(NormalForm& nfi,
+                                                            NormalForm& nfj,
+                                                            int loop_index,
+                                                            int index,
+                                                            InferInfo& info)
 {
   if (options::stringProcessLoopMode() == ProcessLoopMode::ABORT)
   {
@@ -3405,18 +3443,17 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(
 
   NodeManager* nm = NodeManager::currentNM();
   Node conc;
-  Trace("strings-loop") << "Detected possible loop for "
-                        << normal_forms[loop_n_index][loop_index] << std::endl;
-  Trace("strings-loop") << " ... (X)= " << normal_forms[other_n_index][index]
-                        << std::endl;
+  const std::vector<Node>& veci = nfi.d_nf;
+  const std::vector<Node>& vecoi = nfj.d_nf;
 
+  Trace("strings-loop") << "Detected possible loop for " << veci[loop_index]
+                        << std::endl;
+  Trace("strings-loop") << " ... (X)= " << vecoi[index] << std::endl;
   Trace("strings-loop") << " ... T(Y.Z)= ";
-  const std::vector<Node>& veci = normal_forms[loop_n_index];
   std::vector<Node> vec_t(veci.begin() + index, veci.begin() + loop_index);
   Node t_yz = mkConcat(vec_t);
   Trace("strings-loop") << " (" << t_yz << ")" << std::endl;
   Trace("strings-loop") << " ... S(Z.Y)= ";
-  const std::vector<Node>& vecoi = normal_forms[other_n_index];
   std::vector<Node> vec_s(vecoi.begin() + index + 1, vecoi.end());
   Node s_zy = mkConcat(vec_s);
   Trace("strings-loop") << s_zy << std::endl;
@@ -3453,7 +3490,7 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(
   Node split_eq;
   for (unsigned r = 0; r < 2; r++)
   {
-    Node t = r == 0 ? normal_forms[loop_n_index][loop_index] : t_yz;
+    Node t = r == 0 ? veci[loop_index] : t_yz;
     split_eq = t.eqNode(d_emptyString);
     Node split_eqr = Rewriter::rewrite(split_eq);
     // the equality could rewrite to false
@@ -3486,16 +3523,14 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(
       && s_zy.getConst<String>().isRepeated())
   {
     Node rep_c = nm->mkConst(s_zy.getConst<String>().substr(0, 1));
-    Trace("strings-loop") << "Special case (X)="
-                          << normal_forms[other_n_index][index] << " "
+    Trace("strings-loop") << "Special case (X)=" << vecoi[index] << " "
                           << std::endl;
     Trace("strings-loop") << "... (C)=" << rep_c << " " << std::endl;
     // special case
-    str_in_re =
-        nm->mkNode(kind::STRING_IN_REGEXP,
-                   normal_forms[other_n_index][index],
-                   nm->mkNode(kind::REGEXP_STAR,
-                              nm->mkNode(kind::STRING_TO_REGEXP, rep_c)));
+    str_in_re = nm->mkNode(
+        STRING_IN_REGEXP,
+        vecoi[index],
+        nm->mkNode(REGEXP_STAR, nm->mkNode(STRING_TO_REGEXP, rep_c)));
     conc = str_in_re;
   }
   else if (t_yz.isConst())
@@ -3528,12 +3563,12 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(
         continue;
       }
       Node conc2 = nm->mkNode(
-          kind::STRING_IN_REGEXP,
-          normal_forms[other_n_index][index],
-          nm->mkNode(kind::REGEXP_CONCAT,
-                     nm->mkNode(kind::STRING_TO_REGEXP, y),
-                     nm->mkNode(kind::REGEXP_STAR,
-                                nm->mkNode(kind::STRING_TO_REGEXP, restr))));
+          STRING_IN_REGEXP,
+          vecoi[index],
+          nm->mkNode(
+              REGEXP_CONCAT,
+              nm->mkNode(STRING_TO_REGEXP, y),
+              nm->mkNode(REGEXP_STAR, nm->mkNode(STRING_TO_REGEXP, restr))));
       cc = cc == d_true ? conc2 : nm->mkNode(kind::AND, cc, conc2);
       vconc.push_back(cc);
     }
@@ -3566,8 +3601,7 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(
     vec_r.insert(vec_r.begin(), sk_y);
     vec_r.insert(vec_r.begin(), sk_z);
     Node conc2 = s_zy.eqNode(mkConcat(vec_r));
-    Node conc3 =
-        normal_forms[other_n_index][index].eqNode(mkConcat(sk_y, sk_w));
+    Node conc3 = vecoi[index].eqNode(mkConcat(sk_y, sk_w));
     Node restr = r == d_emptyString ? s_zy : mkConcat(sk_z, sk_y);
     str_in_re =
         nm->mkNode(kind::STRING_IN_REGEXP,
@@ -3587,19 +3621,22 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(
   // we will be done
   info.d_conc = conc;
   info.d_id = INFER_FLOOP;
-  info.d_nf_pair[0] = normal_form_src[i];
-  info.d_nf_pair[1] = normal_form_src[j];
+  info.d_nf_pair[0] = nfi.d_base;
+  info.d_nf_pair[1] = nfj.d_base;
   return ProcessLoopResult::INFERENCE;
 }
 
 //return true for lemma, false if we succeed
 void TheoryStrings::processDeq( Node ni, Node nj ) {
   //Assert( areDisequal( ni, nj ) );
-  if( d_normal_forms[ni].size()>1 || d_normal_forms[nj].size()>1 ){
+  NormalForm& nfni = getNormalForm(ni);
+  NormalForm& nfnj = getNormalForm(nj);
+  if (nfni.d_nf.size() > 1 || nfnj.d_nf.size() > 1)
+  {
     std::vector< Node > nfi;
-    nfi.insert( nfi.end(), d_normal_forms[ni].begin(), d_normal_forms[ni].end() );
+    nfi.insert(nfi.end(), nfni.d_nf.begin(), nfni.d_nf.end());
     std::vector< Node > nfj;
-    nfj.insert( nfj.end(), d_normal_forms[nj].begin(), d_normal_forms[nj].end() );
+    nfj.insert(nfj.end(), nfnj.d_nf.begin(), nfnj.d_nf.end());
 
     int revRet = processReverseDeq( nfi, nfj, ni, nj );
     if( revRet!=0 ){
@@ -3607,9 +3644,9 @@ void TheoryStrings::processDeq( Node ni, Node nj ) {
     }
 
     nfi.clear();
-    nfi.insert( nfi.end(), d_normal_forms[ni].begin(), d_normal_forms[ni].end() );
+    nfi.insert(nfi.end(), nfni.d_nf.begin(), nfni.d_nf.end());
     nfj.clear();
-    nfj.insert( nfj.end(), d_normal_forms[nj].begin(), d_normal_forms[nj].end() );
+    nfj.insert(nfj.end(), nfnj.d_nf.begin(), nfnj.d_nf.end());
 
     unsigned index = 0;
     while( index<nfi.size() || index<nfj.size() ){
@@ -3665,8 +3702,10 @@ void TheoryStrings::processDeq( Node ni, Node nj ) {
                   eq1 = Rewriter::rewrite( eq1 );
                   Node eq2 = nconst_k.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, firstChar, skr ) );
                   std::vector< Node > antec;
-                  antec.insert( antec.end(), d_normal_forms_exp[ni].begin(), d_normal_forms_exp[ni].end() );
-                  antec.insert( antec.end(), d_normal_forms_exp[nj].begin(), d_normal_forms_exp[nj].end() );
+                  antec.insert(
+                      antec.end(), nfni.d_exp.begin(), nfni.d_exp.end());
+                  antec.insert(
+                      antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end());
                   antec.push_back( nconst_k.eqNode( d_emptyString ).negate() );
                   sendInference( antec, NodeManager::currentNM()->mkNode( kind::OR, 
                                           NodeManager::currentNM()->mkNode( kind::AND, eq1, sk.eqNode( firstChar ).negate() ), eq2 ), "D-DISL-CSplit" );
@@ -3679,8 +3718,8 @@ void TheoryStrings::processDeq( Node ni, Node nj ) {
               //must add lemma
               std::vector< Node > antec;
               std::vector< Node > antec_new_lits;
-              antec.insert( antec.end(), d_normal_forms_exp[ni].begin(), d_normal_forms_exp[ni].end() );
-              antec.insert( antec.end(), d_normal_forms_exp[nj].begin(), d_normal_forms_exp[nj].end() );
+              antec.insert(antec.end(), nfni.d_exp.begin(), nfni.d_exp.end());
+              antec.insert(antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end());
               //check disequal
               if( areDisequal( ni, nj ) ){
                 antec.push_back( ni.eqNode( nj ).negate() );
@@ -3767,16 +3806,18 @@ int TheoryStrings::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node
       }
     }
   }
+  NormalForm& nfni = getNormalForm(ni);
+  NormalForm& nfnj = getNormalForm(nj);
   while( index<nfi.size() || index<nfj.size() ) {
     if( index>=nfi.size() || index>=nfj.size() ){
       Trace("strings-solve-debug") << "Disequality normalize empty" << std::endl;
       std::vector< Node > ant;
       //we have a conflict : because the lengths are equal, the remainder needs to be empty, which will lead to a conflict
-      Node lni = getLengthExp( ni, ant, d_normal_forms_base[ni] );
-      Node lnj = getLengthExp( nj, ant, d_normal_forms_base[nj] );
+      Node lni = getLengthExp(ni, ant, nfni.d_base);
+      Node lnj = getLengthExp(nj, ant, nfnj.d_base);
       ant.push_back( lni.eqNode( lnj ) );
-      ant.insert( ant.end(), d_normal_forms_exp[ni].begin(), d_normal_forms_exp[ni].end() );
-      ant.insert( ant.end(), d_normal_forms_exp[nj].begin(), d_normal_forms_exp[nj].end() );
+      ant.insert(ant.end(), nfni.d_exp.begin(), nfni.d_exp.end());
+      ant.insert(ant.end(), nfnj.d_exp.begin(), nfnj.d_exp.end());
       std::vector< Node > cc;
       std::vector< Node >& nfk = index>=nfi.size() ? nfj : nfi;
       for( unsigned index_k=index; index_k<nfk.size(); index_k++ ){
@@ -4376,18 +4417,6 @@ Node TheoryStrings::mkAnd( std::vector< Node >& a ) {
   }
 }
 
-void TheoryStrings::getConcatVec( Node n, std::vector< Node >& c ) {
-  if( n.getKind()==kind::STRING_CONCAT ) {
-    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
-      if( !areEqual( n[i], d_emptyString ) ) {
-        c.push_back( n[i] );
-      }
-    }
-  }else{
-    c.push_back( n );
-  }
-}
-
 void TheoryStrings::checkNormalFormsDeq()
 {
   std::vector< std::vector< Node > > cols;
@@ -4422,20 +4451,28 @@ void TheoryStrings::checkNormalFormsDeq()
     separateByLength( d_strings_eqc, cols, lts );
     for( unsigned i=0; i<cols.size(); i++ ){
       if( cols[i].size()>1 && d_lemma_cache.empty() ){
-        Trace("strings-solve") << "- Verify disequalities are processed for " << cols[i][0] << ", normal form : ";
-        printConcat( d_normal_forms[cols[i][0]], "strings-solve" );
-        Trace("strings-solve")  << "... #eql = " << cols[i].size() << std::endl;
+        if (Trace.isOn("strings-solve"))
+        {
+          Trace("strings-solve") << "- Verify disequalities are processed for "
+                                 << cols[i][0] << ", normal form : ";
+          printConcat(getNormalForm(cols[i][0]).d_nf, "strings-solve");
+          Trace("strings-solve")
+              << "... #eql = " << cols[i].size() << std::endl;
+        }
         //must ensure that normal forms are disequal
         for( unsigned j=0; j<cols[i].size(); j++ ){
           for( unsigned k=(j+1); k<cols[i].size(); k++ ){
             //for strings that are disequal, but have the same length
             if( areDisequal( cols[i][j], cols[i][k] ) ){
               Assert( !d_conflict );
-              Trace("strings-solve") << "- Compare " << cols[i][j] << " ";
-              printConcat( d_normal_forms[cols[i][j]], "strings-solve" );
-              Trace("strings-solve") << " against " << cols[i][k] << " ";
-              printConcat( d_normal_forms[cols[i][k]], "strings-solve" );
-              Trace("strings-solve")  << "..." << std::endl;
+              if (Trace.isOn("strings-solve"))
+              {
+                Trace("strings-solve") << "- Compare " << cols[i][j] << " ";
+                printConcat(getNormalForm(cols[i][j]).d_nf, "strings-solve");
+                Trace("strings-solve") << " against " << cols[i][k] << " ";
+                printConcat(getNormalForm(cols[i][k]).d_nf, "strings-solve");
+                Trace("strings-solve") << "..." << std::endl;
+              }
               processDeq( cols[i][j], cols[i][k] );
               if( hasProcessed() ){
                 return;
@@ -4451,7 +4488,7 @@ void TheoryStrings::checkNormalFormsDeq()
 void TheoryStrings::checkLengthsEqc() {
   if( options::stringLenNorm() ){
     for( unsigned i=0; i<d_strings_eqc.size(); i++ ){
-      //if( d_normal_forms[nodes[i]].size()>1 ) {
+      NormalForm& nfi = getNormalForm(d_strings_eqc[i]);
       Trace("strings-process-debug") << "Process length constraints for " << d_strings_eqc[i] << std::endl;
       //check if there is a length term for this equivalence class
       EqcInfo* ei = getOrMakeEqcInfo( d_strings_eqc[i], false );
@@ -4460,19 +4497,22 @@ void TheoryStrings::checkLengthsEqc() {
         Node llt = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, lt );
         //now, check if length normalization has occurred
         if( ei->d_normalized_length.get().isNull() ) {
-          Node nf = mkConcat( d_normal_forms[d_strings_eqc[i]] );
+          Node nf = mkConcat(nfi.d_nf);
           if( Trace.isOn("strings-process-debug") ){
-            Trace("strings-process-debug") << "  normal form is " << nf << " from base " << d_normal_forms_base[d_strings_eqc[i]] << std::endl;
+            Trace("strings-process-debug")
+                << "  normal form is " << nf << " from base " << nfi.d_base
+                << std::endl;
             Trace("strings-process-debug") << "  normal form exp is: " << std::endl;
-            for( unsigned j=0; j<d_normal_forms_exp[d_strings_eqc[i]].size(); j++ ){
-              Trace("strings-process-debug") << "   " << d_normal_forms_exp[d_strings_eqc[i]][j] << std::endl;
+            for (const Node& exp : nfi.d_exp)
+            {
+              Trace("strings-process-debug") << "   " << exp << std::endl;
             }
           }
           
           //if not, add the lemma
           std::vector< Node > ant;
-          ant.insert( ant.end(), d_normal_forms_exp[d_strings_eqc[i]].begin(), d_normal_forms_exp[d_strings_eqc[i]].end() );
-          ant.push_back( d_normal_forms_base[d_strings_eqc[i]].eqNode( lt ) );
+          ant.insert(ant.end(), nfi.d_exp.begin(), nfi.d_exp.end());
+          ant.push_back(nfi.d_base.eqNode(lt));
           Node lc = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, nf );
           Node lcr = Rewriter::rewrite( lc );
           Trace("strings-process-debug") << "Rewrote length " << lc << " to " << lcr << std::endl;
@@ -4485,7 +4525,7 @@ void TheoryStrings::checkLengthsEqc() {
       }else{
         Trace("strings-process-debug") << "No length term for eqc " << d_strings_eqc[i] << " " << d_eqc_to_len_term[d_strings_eqc[i]] << std::endl;
         if( !options::stringEagerLen() ){
-          Node c = mkConcat( d_normal_forms[d_strings_eqc[i]] );
+          Node c = mkConcat(nfi.d_nf);
           registerTerm( c, 3 );
           /*
           if( !c.isConst() ){
index b3cb323ae51b8f89d0769401aba3fa5e47e0fff1..e9d82a7b26cae611fde08bda1ec144472ae1866e 100644 (file)
@@ -24,6 +24,7 @@
 #include "expr/attribute.h"
 #include "expr/node_trie.h"
 #include "theory/decision_manager.h"
+#include "theory/strings/normal_form.h"
 #include "theory/strings/regexp_elim.h"
 #include "theory/strings/regexp_operation.h"
 #include "theory/strings/regexp_solver.h"
@@ -307,11 +308,10 @@ class TheoryStrings : public Theory {
   /** inferences: maintained to ensure ref count for internally introduced nodes */
   NodeList d_infer;
   NodeList d_infer_exp;
-  /** normal forms */
-  std::map< Node, Node > d_normal_forms_base;
-  std::map< Node, std::vector< Node > > d_normal_forms;
-  std::map< Node, std::vector< Node > > d_normal_forms_exp;
-  std::map< Node, std::map< Node, std::map< bool, int > > > d_normal_forms_exp_depend;
+  /** map from terms to their normal forms */
+  std::map<Node, NormalForm> d_normal_form;
+  /** get normal form */
+  NormalForm& getNormalForm(Node n);
   //map of pairs of terms that have the same normal form
   NodeIntMap d_nf_pairs;
   std::map< Node, std::vector< Node > > d_nf_pairs_data;
@@ -468,8 +468,14 @@ private:
   class InferInfo
   {
    public:
-    unsigned d_i;
-    unsigned d_j;
+    /** for debugging
+     *
+     * The base pair of strings d_i/d_j that led to the inference, and whether
+     * (d_rev) we were processing the normal forms of these strings in reverse
+     * direction.
+     */
+    Node d_i;
+    Node d_j;
     bool d_rev;
     std::vector<Node> d_ant;
     std::vector<Node> d_antn;
@@ -554,10 +560,77 @@ private:
   //--------------------------end for checkCycles
 
   //--------------------------for checkNormalFormsEq
+  /** normalize equivalence class
+   *
+   * This method attempts to build a "normal form" for the equivalence class
+   * of string term n (for more details on normal forms, see normal_form.h
+   * or see Liang et al CAV 2014). In particular, this method checks whether the
+   * current normal form for each term in this equivalence class is identical.
+   * If it is not, then we add an inference via sendInference and abort the
+   * call.
+   */
   void normalizeEquivalenceClass( Node n );
-  void getNormalForms( Node &eqc, std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
-                       std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend );
-  bool detectLoop( std::vector< std::vector< Node > > &normal_forms, int i, int j, int index, int &loop_in_i, int &loop_in_j, unsigned rproc );
+  /**
+   * For each term in the equivalence class of eqc, this adds data regarding its
+   * normal form to normal_forms. The map term_to_nf_index maps terms to the
+   * index in normal_forms where their normal form data is located.
+   */
+  void getNormalForms(Node eqc,
+                      std::vector<NormalForm>& normal_forms,
+                      std::map<Node, unsigned>& term_to_nf_index);
+  /** process normalize equivalence class
+   *
+   * This is called when an equivalence class contains a set of terms that
+   * have normal forms given by the argument normal_forms. It either
+   * verifies that all normal forms in this vector are identical, or otherwise
+   * adds a conflict, lemma, or inference via the sendInference method.
+   *
+   * To prioritize one inference versus another, it builds a set of possible
+   * inferences, at most two for each pair of distinct normal forms,
+   * corresponding to processing the normal form pair in the (forward, reverse)
+   * directions. Once all possible inferences are recorded, it executes the
+   * one with highest priority based on the enumeration type Inference.
+   */
+  void processNEqc(std::vector<NormalForm>& normal_forms);
+  /** process simple normal equality
+   *
+   * This method is called when two equal terms have normal forms nfi and nfj.
+   * It adds (typically at most one) possible inference to the vector pinfer.
+   * This inference is in the form of an InferInfo object, which stores the
+   * necessary information regarding how to process the inference.
+   *
+   * index: The index in the normal form vectors (nfi.d_nf and nfj.d_nf) that
+   *   we are currently checking. This method will increment this index until
+   *   it finds an index where these vectors differ, or until it reaches the
+   *   end of these vectors.
+   * isRev: Whether we are processing the normal forms in reverse direction.
+   *   Notice in this case the normal form vectors have been reversed, hence,
+   *   many operations are identical to the forward case, e.g. index is
+   *   incremented not decremented, while others require special care, e.g.
+   *   constant strings "ABC" in the normal form vectors are not reversed to
+   *   "CBA" and hence all operations should assume a flipped semantics for
+   *   constants when isRev is true,
+   * rproc: the number of string components on the suffix of the normal form of
+   *   nfi and nfj that were already processed. This is used when using
+   *   fowards/backwards traversals of normal forms to ensure that duplicate
+   *   inferences are not processed.
+   * pinfer: the set of possible inferences we add to.
+   */
+  void processSimpleNEq(NormalForm& nfi,
+                        NormalForm& nfj,
+                        unsigned& index,
+                        bool isRev,
+                        unsigned rproc,
+                        std::vector<InferInfo>& pinfer);
+  //--------------------------end for checkNormalFormsEq
+
+  //--------------------------for checkNormalFormsEq with loops
+  bool detectLoop(NormalForm& nfi,
+                  NormalForm& nfj,
+                  int index,
+                  int& loop_in_i,
+                  int& loop_in_j,
+                  unsigned rproc);
 
   /**
    * Result of processLoop() below.
@@ -572,36 +645,17 @@ private:
     SKIPPED,
   };
 
-  ProcessLoopResult processLoop(
-      const std::vector<std::vector<Node> >& normal_forms,
-      const std::vector<Node>& normal_form_src,
-      int i,
-      int j,
-      int loop_n_index,
-      int other_n_index,
-      int loop_index,
-      int index,
-      InferInfo& info);
-
-  void processNEqc( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
-                    std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend );
-  void processReverseNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, 
-                          std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, 
-                          unsigned i, unsigned j, unsigned& index, unsigned rproc, std::vector< InferInfo >& pinfer );
-  void processSimpleNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, 
-                         std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, 
-                         unsigned i, unsigned j, unsigned& index, bool isRev, unsigned rproc, std::vector< InferInfo >& pinfer );
-  //--------------------------end for checkNormalFormsEq
+  ProcessLoopResult processLoop(NormalForm& nfi,
+                                NormalForm& nfj,
+                                int loop_index,
+                                int index,
+                                InferInfo& info);
+  //--------------------------end for checkNormalFormsEq with loops
 
   //--------------------------for checkNormalFormsDeq
   void processDeq( Node n1, Node n2 );
   int processReverseDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj );
   int processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev );
-  void getExplanationVectorForPrefix( std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
-                                      unsigned i, int index, bool isRev, std::vector< Node >& curr_exp );
-  void getExplanationVectorForPrefixEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
-                                        std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
-                                        unsigned i, unsigned j, int index_i, int index_j, bool isRev, std::vector< Node >& curr_exp );
   //--------------------------end for checkNormalFormsDeq
 
   //--------------------------------for checkMemberships
@@ -787,8 +841,6 @@ private:
  protected:
   /** mkAnd **/
   Node mkAnd(std::vector<Node>& a);
-  /** get concat vector */
-  void getConcatVec(Node n, std::vector<Node>& c);
 
   /** get equivalence classes
    *