From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Wed, 15 Apr 2020 13:23:27 +0000 (-0500)
Subject: Split TermRegistry object from TheoryStrings (#4312)
X-Git-Tag: cvc5-1.0.0~3364
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=42c765eb255e5bfa65682cd812973f0f3c90017c;p=cvc5.git

Split TermRegistry object from TheoryStrings (#4312)

This consolidates functionalities from TheoryStrings and InferenceManager related to registering terms, including sending "preregistration lemmas" for them. The main purpose of this PR is to detangle this module from InferenceManager so that these two modules have exactly one call to OutputChannel::lemma each.

For the purposes of the theory solvers, TermRegistry contains the official SkolemCache of TheoryStrings, and can be seen as subsuming the previous interface for this class.

This PR is needed for further progress on strings proofs, marking as major since this will be a blocker shortly for this project.

A few things were cleaned in this PR. One function changed name InferenceManager::registerTerm --> TermRegistry::getRegisterTermLemma. No major behavior changes.
---

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9ad790398..6a15335c1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -714,6 +714,8 @@ libcvc4_add_sources(
   theory/strings/theory_strings_type_rules.h
   theory/strings/theory_strings_utils.cpp
   theory/strings/theory_strings_utils.h
+  theory/strings/term_registry.cpp
+  theory/strings/term_registry.h
   theory/strings/type_enumerator.cpp
   theory/strings/type_enumerator.h
   theory/strings/word.cpp
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index abcd8189a..e574e4fa9 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -30,10 +30,13 @@ namespace CVC4 {
 namespace theory {
 namespace strings {
 
-CoreSolver::CoreSolver(context::Context* c, context::UserContext* u, SolverState& s, InferenceManager& im, SkolemCache& skc, BaseSolver& bs) :
-d_state(s), d_im(im), d_skCache(skc),
-d_bsolver(bs),
-d_nf_pairs(c)
+CoreSolver::CoreSolver(context::Context* c,
+                       context::UserContext* u,
+                       SolverState& s,
+                       InferenceManager& im,
+                       TermRegistry& tr,
+                       BaseSolver& bs)
+    : d_state(s), d_im(im), d_termReg(tr), d_bsolver(bs), d_nf_pairs(c)
 {
   d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) );
   d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) );
@@ -1252,7 +1255,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
           // If the equality rewrites to a constant, we must use the
           // purify variable for this string to communicate that
           // we have inferred whether it is empty.
-          Node p = d_skCache.mkSkolemCached(nc, SkolemCache::SK_PURIFY, "lsym");
+          SkolemCache* skc = d_termReg.getSkolemCache();
+          Node p = skc->mkSkolemCached(nc, SkolemCache::SK_PURIFY, "lsym");
           Node pEq = p.eqNode(emp);
           // should not be constant
           Assert(!Rewriter::rewrite(pEq).isConst());
@@ -1326,7 +1330,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
           Node prea = p == straLen ? stra
                                    : (isRev ? Word::suffix(stra, p)
                                             : Word::prefix(stra, p));
-          Node sk = d_skCache.mkSkolemCached(
+          SkolemCache* skc = d_termReg.getSkolemCache();
+          Node sk = skc->mkSkolemCached(
               nc,
               prea,
               isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT,
@@ -1352,7 +1357,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
       Node firstChar = straLen == 1 ? stra
                                     : (isRev ? Word::suffix(stra, 1)
                                              : Word::prefix(stra, 1));
-      Node sk = d_skCache.mkSkolemCached(
+      SkolemCache* skc = d_termReg.getSkolemCache();
+      Node sk = skc->mkSkolemCached(
           nc,
           isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT,
           "c_spt");
@@ -1425,7 +1431,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
         info.d_antn.push_back(tnz);
       }
     }
-    Node sk = d_skCache.mkSkolemCached(
+    SkolemCache* skc = d_termReg.getSkolemCache();
+    Node sk = skc->mkSkolemCached(
         x,
         y,
         isRev ? SkolemCache::SK_ID_V_SPT_REV : SkolemCache::SK_ID_V_SPT,
@@ -1664,10 +1671,11 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi,
     Trace("strings-loop") << "Strings::Loop: Normal Loop Breaking."
                           << std::endl;
     // right
-    Node sk_w = d_skCache.mkSkolem("w_loop");
-    Node sk_y = d_skCache.mkSkolem("y_loop");
-    d_im.registerTermAtomic(sk_y, LENGTH_GEQ_ONE);
-    Node sk_z = d_skCache.mkSkolem("z_loop");
+    SkolemCache* skc = d_termReg.getSkolemCache();
+    Node sk_w = skc->mkSkolem("w_loop");
+    Node sk_y = skc->mkSkolem("y_loop");
+    d_termReg.registerTermAtomic(sk_y, LENGTH_GEQ_ONE);
+    Node sk_z = skc->mkSkolem("z_loop");
     // t1 * ... * tn = y * z
     Node conc1 = t_yz.eqNode(utils::mkNConcat(sk_y, sk_z));
     // s1 * ... * sk = z * y * r
@@ -1838,10 +1846,11 @@ void CoreSolver::processDeq(Node ni, Node nj)
           //
           // E.g. x ++ x' ++ ... != "abc" ++ y' ++ ... ^ len(x) != "" --->
           //      x = k1 ++ k2 ^ len(k1) = 1 ^ (k1 != "a" v x = "a" ++  k2)
-          Node sk = d_skCache.mkSkolemCached(
-              nck, SkolemCache::SK_ID_DC_SPT, "dc_spt");
-          d_im.registerTermAtomic(sk, LENGTH_ONE);
-          Node skr = d_skCache.mkSkolemCached(
+          SkolemCache* skc = d_termReg.getSkolemCache();
+          Node sk =
+              skc->mkSkolemCached(nck, SkolemCache::SK_ID_DC_SPT, "dc_spt");
+          d_termReg.registerTermAtomic(sk, LENGTH_ONE);
+          Node skr = skc->mkSkolemCached(
               nck, SkolemCache::SK_ID_DC_SPT_REM, "dc_spt_rem");
           Node eq1 = nck.eqNode(nm->mkNode(kind::STRING_CONCAT, sk, skr));
           eq1 = Rewriter::rewrite(eq1);
@@ -1884,13 +1893,14 @@ void CoreSolver::processDeq(Node ni, Node nj)
         antecNewLits.push_back(xLenTerm.eqNode(yLenTerm).negate());
 
         std::vector<Node> conc;
-        Node sk1 = d_skCache.mkSkolemCached(
-            x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit");
-        Node sk2 = d_skCache.mkSkolemCached(
-            x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit");
-        Node sk3 = d_skCache.mkSkolemCached(
-            x, y, SkolemCache::SK_ID_DEQ_Z, "z_dsplit");
-        d_im.registerTermAtomic(sk3, LENGTH_GEQ_ONE);
+        SkolemCache* skc = d_termReg.getSkolemCache();
+        Node sk1 =
+            skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit");
+        Node sk2 =
+            skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit");
+        Node sk3 =
+            skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Z, "z_dsplit");
+        d_termReg.registerTermAtomic(sk3, LENGTH_GEQ_ONE);
         Node sk1Len = utils::mkNLength(sk1);
         conc.push_back(sk1Len.eqNode(xLenTerm));
         Node sk2Len = utils::mkNLength(sk2);
@@ -2294,7 +2304,7 @@ void CoreSolver::doInferInfo(const InferInfo& ii)
   {
     for (const Node& n : sks.second)
     {
-      d_im.registerTermAtomic(n, sks.first);
+      d_termReg.registerTermAtomic(n, sks.first);
     }
   }
 }
diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h
index b7b487ff7..cef5bd3be 100644
--- a/src/theory/strings/core_solver.h
+++ b/src/theory/strings/core_solver.h
@@ -24,8 +24,8 @@
 #include "theory/strings/infer_info.h"
 #include "theory/strings/inference_manager.h"
 #include "theory/strings/normal_form.h"
-#include "theory/strings/skolem_cache.h"
 #include "theory/strings/solver_state.h"
+#include "theory/strings/term_registry.h"
 
 namespace CVC4 {
 namespace theory {
@@ -46,7 +46,7 @@ class CoreSolver
              context::UserContext* u,
              SolverState& s,
              InferenceManager& im,
-             SkolemCache& skc,
+             TermRegistry& tr,
              BaseSolver& bs);
   ~CoreSolver();
 
@@ -370,8 +370,8 @@ class CoreSolver
   SolverState& d_state;
   /** The (custom) output channel of the theory of strings */
   InferenceManager& d_im;
-  /** cache of all skolems */
-  SkolemCache& d_skCache;
+  /** Reference to the term registry of theory of strings */
+  TermRegistry& d_termReg;
   /** reference to the base solver, used for certain queries */
   BaseSolver& d_bsolver;
   /** Commonly used constants */
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp
index 0f4af2458..1bcd67cb4 100644
--- a/src/theory/strings/extf_solver.cpp
+++ b/src/theory/strings/extf_solver.cpp
@@ -31,7 +31,7 @@ ExtfSolver::ExtfSolver(context::Context* c,
                        context::UserContext* u,
                        SolverState& s,
                        InferenceManager& im,
-                       SkolemCache& skc,
+                       TermRegistry& tr,
                        StringsRewriter& rewriter,
                        BaseSolver& bs,
                        CoreSolver& cs,
@@ -39,13 +39,13 @@ ExtfSolver::ExtfSolver(context::Context* c,
                        SequencesStatistics& statistics)
     : d_state(s),
       d_im(im),
-      d_skCache(skc),
+      d_termReg(tr),
       d_rewriter(rewriter),
       d_bsolver(bs),
       d_csolver(cs),
       d_extt(et),
       d_statistics(statistics),
-      d_preproc(&skc, u, statistics),
+      d_preproc(d_termReg.getSkolemCache(), u, statistics),
       d_hasExtf(c, false),
       d_extfInferCache(c),
       d_reduced(u)
@@ -160,10 +160,9 @@ bool ExtfSolver::doReduction(int effort, Node n)
     Node x = n[0];
     Node s = n[1];
     // positive contains reduces to a equality
-    Node sk1 =
-        d_skCache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1");
-    Node sk2 =
-        d_skCache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2");
+    SkolemCache* skc = d_termReg.getSkolemCache();
+    Node sk1 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1");
+    Node sk2 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2");
     Node eq = Rewriter::rewrite(x.eqNode(utils::mkNConcat(sk1, s, sk2)));
     std::vector<Node> exp_vec;
     exp_vec.push_back(n);
@@ -305,7 +304,7 @@ void ExtfSolver::checkExtfEval(int effort)
           // only use symbolic definitions if option is set
           if (options::stringInferSym())
           {
-            nrs = d_im.getSymbolicDefinition(sn, exps);
+            nrs = d_termReg.getSymbolicDefinition(sn, exps);
           }
           if (!nrs.isNull())
           {
diff --git a/src/theory/strings/extf_solver.h b/src/theory/strings/extf_solver.h
index f0f0d790b..c5dd24f70 100644
--- a/src/theory/strings/extf_solver.h
+++ b/src/theory/strings/extf_solver.h
@@ -87,7 +87,7 @@ class ExtfSolver
              context::UserContext* u,
              SolverState& s,
              InferenceManager& im,
-             SkolemCache& skc,
+             TermRegistry& tr,
              StringsRewriter& rewriter,
              BaseSolver& bs,
              CoreSolver& cs,
@@ -181,8 +181,8 @@ class ExtfSolver
   SolverState& d_state;
   /** The (custom) output channel of the theory of strings */
   InferenceManager& d_im;
-  /** cache of all skolems */
-  SkolemCache& d_skCache;
+  /** Reference to the term registry of theory of strings */
+  TermRegistry& d_termReg;
   /** The theory rewriter for this theory. */
   StringsRewriter& d_rewriter;
   /** reference to the base solver, used for certain queries */
diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp
index d19ce538d..b6bbcb7df 100644
--- a/src/theory/strings/inference_manager.cpp
+++ b/src/theory/strings/inference_manager.cpp
@@ -34,18 +34,15 @@ InferenceManager::InferenceManager(TheoryStrings& p,
                                    context::Context* c,
                                    context::UserContext* u,
                                    SolverState& s,
-                                   SkolemCache& skc,
+                                   TermRegistry& tr,
                                    OutputChannel& out,
                                    SequencesStatistics& statistics)
     : d_parent(p),
       d_state(s),
-      d_skCache(skc),
+      d_termReg(tr),
       d_out(out),
       d_statistics(statistics),
-      d_keep(c),
-      d_proxyVar(u),
-      d_proxyVarToLength(u),
-      d_lengthLemmaTermsCache(u)
+      d_keep(c)
 {
   NodeManager* nm = NodeManager::currentNM();
   d_zero = nm->mkConst(Rational(0));
@@ -233,7 +230,7 @@ void InferenceManager::sendInfer(Node eq_exp, Node eq, Inference infer)
     std::vector<Node> vars;
     std::vector<Node> subs;
     std::vector<Node> unproc;
-    inferSubstitutionProxyVars(eq_exp, vars, subs, unproc);
+    d_termReg.inferSubstitutionProxyVars(eq_exp, vars, subs, unproc);
     if (unproc.empty())
     {
       Node eqs =
@@ -297,235 +294,6 @@ void InferenceManager::sendPhaseRequirement(Node lit, bool pol)
   d_pendingReqPhase[lit] = pol;
 }
 
-Node InferenceManager::getProxyVariableFor(Node n) const
-{
-  NodeNodeMap::const_iterator it = d_proxyVar.find(n);
-  if (it != d_proxyVar.end())
-  {
-    return (*it).second;
-  }
-  return Node::null();
-}
-
-Node InferenceManager::getSymbolicDefinition(Node n,
-                                             std::vector<Node>& exp) const
-{
-  if (n.getNumChildren() == 0)
-  {
-    Node pn = getProxyVariableFor(n);
-    if (pn.isNull())
-    {
-      return Node::null();
-    }
-    Node eq = n.eqNode(pn);
-    eq = Rewriter::rewrite(eq);
-    if (std::find(exp.begin(), exp.end(), eq) == exp.end())
-    {
-      exp.push_back(eq);
-    }
-    return pn;
-  }
-  std::vector<Node> children;
-  if (n.getMetaKind() == metakind::PARAMETERIZED)
-  {
-    children.push_back(n.getOperator());
-  }
-  for (const Node& nc : n)
-  {
-    if (n.getType().isRegExp())
-    {
-      children.push_back(nc);
-    }
-    else
-    {
-      Node ns = getSymbolicDefinition(nc, exp);
-      if (ns.isNull())
-      {
-        return Node::null();
-      }
-      else
-      {
-        children.push_back(ns);
-      }
-    }
-  }
-  return NodeManager::currentNM()->mkNode(n.getKind(), children);
-}
-
-Node InferenceManager::registerTerm(Node n)
-{
-  Assert(n.getType().isStringLike());
-  NodeManager* nm = NodeManager::currentNM();
-  // register length information:
-  //  for variables, split on empty vs positive length
-  //  for concat/const/replace, introduce proxy var and state length relation
-  Node lsum;
-  if (n.getKind() != STRING_CONCAT && !n.isConst())
-  {
-    Node lsumb = nm->mkNode(STRING_LENGTH, n);
-    lsum = Rewriter::rewrite(lsumb);
-    // can register length term if it does not rewrite
-    if (lsum == lsumb)
-    {
-      registerTermAtomic(n, LENGTH_SPLIT);
-      return Node::null();
-    }
-  }
-  Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
-  StringsProxyVarAttribute spva;
-  sk.setAttribute(spva, true);
-  Node eq = Rewriter::rewrite(sk.eqNode(n));
-  d_proxyVar[n] = sk;
-  // If we are introducing a proxy for a constant or concat term, we do not
-  // need to send lemmas about its length, since its length is already
-  // implied.
-  if (n.isConst() || n.getKind() == STRING_CONCAT)
-  {
-    // do not send length lemma for sk.
-    registerTermAtomic(sk, LENGTH_IGNORE);
-  }
-  Node skl = nm->mkNode(STRING_LENGTH, sk);
-  if (n.getKind() == STRING_CONCAT)
-  {
-    std::vector<Node> nodeVec;
-    for (const Node& nc : n)
-    {
-      if (nc.getAttribute(StringsProxyVarAttribute()))
-      {
-        Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
-        nodeVec.push_back(d_proxyVarToLength[nc]);
-      }
-      else
-      {
-        Node lni = nm->mkNode(STRING_LENGTH, nc);
-        nodeVec.push_back(lni);
-      }
-    }
-    lsum = nm->mkNode(PLUS, nodeVec);
-    lsum = Rewriter::rewrite(lsum);
-  }
-  else if (n.isConst())
-  {
-    lsum = nm->mkConst(Rational(Word::getLength(n)));
-  }
-  Assert(!lsum.isNull());
-  d_proxyVarToLength[sk] = lsum;
-  Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
-
-  return nm->mkNode(AND, eq, ceq);
-}
-
-void InferenceManager::registerTermAtomic(Node n, LengthStatus s)
-{
-  if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
-  {
-    return;
-  }
-  d_lengthLemmaTermsCache.insert(n);
-
-  if (s == LENGTH_IGNORE)
-  {
-    // ignore it
-    return;
-  }
-  std::map<Node, bool> reqPhase;
-  Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
-  if (!lenLem.isNull())
-  {
-    Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl;
-    ++(d_statistics.d_lemmasRegisterTermAtomic);
-    d_out.lemma(lenLem);
-  }
-  for (const std::pair<const Node, bool>& rp : reqPhase)
-  {
-    d_out.requirePhase(rp.first, rp.second);
-  }
-}
-
-Node InferenceManager::getRegisterTermAtomicLemma(
-    Node n, LengthStatus s, std::map<Node, bool>& reqPhase)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
-
-  if (s == LENGTH_GEQ_ONE)
-  {
-    Node emp = Word::mkEmptyWord(n.getType());
-    Node neq_empty = n.eqNode(emp).negate();
-    Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
-    Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
-    Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
-    return len_geq_one;
-  }
-
-  if (s == LENGTH_ONE)
-  {
-    Node len_one = n_len.eqNode(d_one);
-    Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
-    return len_one;
-  }
-  Assert(s == LENGTH_SPLIT);
-
-  Node emp = Word::mkEmptyWord(n.getType());
-  std::vector<Node> lems;
-  // split whether the string is empty
-  Node n_len_eq_z = n_len.eqNode(d_zero);
-  Node n_len_eq_z_2 = n.eqNode(emp);
-  Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
-  case_empty = Rewriter::rewrite(case_empty);
-  Node case_nempty = nm->mkNode(GT, n_len, d_zero);
-  if (!case_empty.isConst())
-  {
-    Node lem = nm->mkNode(OR, case_empty, case_nempty);
-    lems.push_back(lem);
-    Trace("strings-lemma") << "Strings::Lemma LENGTH >= 0 : " << lem
-                           << std::endl;
-    // prefer trying the empty case first
-    // notice that requirePhase must only be called on rewritten literals that
-    // occur in the CNF stream.
-    n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
-    Assert(!n_len_eq_z.isConst());
-    reqPhase[n_len_eq_z] = true;
-    n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
-    Assert(!n_len_eq_z_2.isConst());
-    reqPhase[n_len_eq_z_2] = true;
-  }
-  else if (!case_empty.getConst<bool>())
-  {
-    // the rewriter knows that n is non-empty
-    Trace("strings-lemma") << "Strings::Lemma LENGTH > 0 (non-empty): "
-                           << case_nempty << std::endl;
-    lems.push_back(case_nempty);
-  }
-  else
-  {
-    // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
-    // n ---> "". Since this method is only called on non-constants n, it must
-    // be that n = "" ^ len( n ) = 0 does not rewrite to true.
-    Assert(false);
-  }
-
-  // additionally add len( x ) >= 0 ?
-  if (options::stringLenGeqZ())
-  {
-    Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero);
-    n_len_geq = Rewriter::rewrite(n_len_geq);
-    lems.push_back(n_len_geq);
-  }
-
-  if (lems.empty())
-  {
-    return Node::null();
-  }
-  return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems);
-}
-
 void InferenceManager::addToExplanation(Node a,
                                         Node b,
                                         std::vector<Node>& exp) const
@@ -601,88 +369,6 @@ bool InferenceManager::hasProcessed() const
   return d_state.isInConflict() || !d_pendingLem.empty() || !d_pending.empty();
 }
 
-void InferenceManager::inferSubstitutionProxyVars(
-    Node n,
-    std::vector<Node>& vars,
-    std::vector<Node>& subs,
-    std::vector<Node>& unproc) const
-{
-  if (n.getKind() == AND)
-  {
-    for (const Node& nc : n)
-    {
-      inferSubstitutionProxyVars(nc, vars, subs, unproc);
-    }
-    return;
-  }
-  if (n.getKind() == EQUAL)
-  {
-    Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end());
-    ns = Rewriter::rewrite(ns);
-    if (ns.getKind() == EQUAL)
-    {
-      Node s;
-      Node v;
-      for (unsigned i = 0; i < 2; i++)
-      {
-        Node ss;
-        // determine whether this side has a proxy variable
-        if (ns[i].getAttribute(StringsProxyVarAttribute()))
-        {
-          // it is a proxy variable
-          ss = ns[i];
-        }
-        else if (ns[i].isConst())
-        {
-          ss = getProxyVariableFor(ns[i]);
-        }
-        if (!ss.isNull())
-        {
-          v = ns[1 - i];
-          // if the other side is a constant or variable
-          if (v.getNumChildren() == 0)
-          {
-            if (s.isNull())
-            {
-              s = ss;
-            }
-            else
-            {
-              // both sides of the equality correspond to a proxy variable
-              if (ss == s)
-              {
-                // it is a trivial equality, e.g. between a proxy variable
-                // and its definition
-                return;
-              }
-              else
-              {
-                // equality between proxy variables, non-trivial
-                s = Node::null();
-              }
-            }
-          }
-        }
-      }
-      if (!s.isNull())
-      {
-        // the equality can be turned into a substitution
-        subs.push_back(s);
-        vars.push_back(v);
-        return;
-      }
-    }
-    else
-    {
-      n = ns;
-    }
-  }
-  if (n != d_true)
-  {
-    unproc.push_back(n);
-  }
-}
-
 Node InferenceManager::mkExplain(const std::vector<Node>& a) const
 {
   std::vector<Node> an;
diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h
index 60139ca83..c1550328c 100644
--- a/src/theory/strings/inference_manager.h
+++ b/src/theory/strings/inference_manager.h
@@ -26,8 +26,8 @@
 #include "theory/output_channel.h"
 #include "theory/strings/infer_info.h"
 #include "theory/strings/sequences_stats.h"
-#include "theory/strings/skolem_cache.h"
 #include "theory/strings/solver_state.h"
+#include "theory/strings/term_registry.h"
 #include "theory/uf/equality_engine.h"
 
 namespace CVC4 {
@@ -76,7 +76,7 @@ class InferenceManager
                    context::Context* c,
                    context::UserContext* u,
                    SolverState& s,
-                   SkolemCache& skc,
+                   TermRegistry& tr,
                    OutputChannel& out,
                    SequencesStatistics& statistics);
   ~InferenceManager() {}
@@ -176,60 +176,6 @@ class InferenceManager
    */
   void sendPhaseRequirement(Node lit, bool pol);
 
-  //---------------------------- proxy variables and length elaboration
-  /** Get symbolic definition
-   *
-   * This method returns the "symbolic definition" of n, call it n', and
-   * populates the vector exp with an explanation such that exp => n = n'.
-   *
-   * The symbolic definition of n is the term where (maximal) subterms of n
-   * are replaced by their proxy variables. For example, if we introduced
-   * proxy variable v for x ++ y, then given input x ++ y = w, this method
-   * returns v = w and adds v = x ++ y to exp.
-   */
-  Node getSymbolicDefinition(Node n, std::vector<Node>& exp) const;
-  /** Get proxy variable
-   *
-   * If this method returns the proxy variable for (string) term n if one
-   * exists, otherwise it returns null.
-   */
-  Node getProxyVariableFor(Node n) const;
-  /** register term
-   *
-   * This method is called on non-constant string terms n. It returns a lemma
-   * that should be sent on the output channel of theory of strings upon
-   * registration of this term, or null if no lemma is necessary.
-   *
-   * If n is an atomic term, the method registerTermAtomic is called for n
-   * and s = LENGTH_SPLIT and no lemma is returned.
-   */
-  Node registerTerm(Node n);
-  /** register length
-   *
-   * This method is called on non-constant string terms n that are "atomic"
-   * with respect to length. That is, the rewritten form of len(n) is itself.
-   *
-   * It sends a lemma on the output channel that ensures that the length n
-   * satisfies its assigned status (given by argument s).
-   *
-   * If the status is LENGTH_ONE, we send the lemma len( n ) = 1.
-   *
-   * If the status is LENGTH_GEQ, we send a lemma n != "" ^ len( n ) > 0.
-   *
-   * If the status is LENGTH_SPLIT, we send a send a lemma of the form:
-   *   ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0
-   * This method also ensures that, when applicable, the left branch is taken
-   * first via calls to requirePhase.
-   *
-   * If the status is LENGTH_IGNORE, then no lemma is sent. This status is used
-   * e.g. when the length of n is already implied by other constraints.
-   *
-   * In contrast to the above functions, it makes immediate calls to the output
-   * channel instead of adding them to pending lists.
-   */
-  void registerTermAtomic(Node n, LengthStatus s);
-  //---------------------------- end proxy variables and length elaboration
-
   //----------------------------constructing antecedants
   /**
    * Adds equality a = b to the vector exp if a and b are distinct terms. It
@@ -326,32 +272,15 @@ class InferenceManager
    * equality engine of this class.
    */
   void sendInfer(Node eq_exp, Node eq, Inference infer);
-  /**
-   * Get the lemma required for registering the length information for
-   * atomic term n given length status s. For details, see registerTermAtomic.
-   *
-   * Additionally, this method may map literals to a required polarity in the
-   * argument reqPhase, which should be processed by a call to requiredPhase by
-   * the caller of this method.
-   */
-  Node getRegisterTermAtomicLemma(Node n,
-                                  LengthStatus s,
-                                  std::map<Node, bool>& reqPhase);
 
   /** the parent theory of strings object */
   TheoryStrings& d_parent;
-  /**
-   * This is a reference to the solver state of the theory of strings.
-   */
+  /** Reference to the solver state of the theory of strings. */
   SolverState& d_state;
-  /** cache of all skolems */
-  SkolemCache& d_skCache;
-  /** the output channel
-   *
-   * This is a reference to the output channel of the theory of strings.
-   */
+  /** Reference to the term registry of theory of strings */
+  TermRegistry& d_termReg;
+  /** Reference to the output channel of the theory of strings. */
   OutputChannel& d_out;
-
   /** Reference to the statistics for the theory of strings/sequences. */
   SequencesStatistics& d_statistics;
 
@@ -375,58 +304,6 @@ class InferenceManager
    * SAT-context-dependent.
    */
   NodeSet d_keep;
-  /**
-   * Map string terms to their "proxy variables". Proxy variables are used are
-   * intermediate variables so that length information can be communicated for
-   * constants. For example, to communicate that "ABC" has length 3, we
-   * introduce a proxy variable v_{"ABC"} for "ABC", and assert:
-   *   v_{"ABC"} = "ABC" ^ len( v_{"ABC"} ) = 3
-   * Notice this is required since we cannot directly write len( "ABC" ) = 3,
-   * which rewrites to 3 = 3.
-   * In the above example, we store "ABC" -> v_{"ABC"} in this map.
-   */
-  NodeNodeMap d_proxyVar;
-  /**
-   * Map from proxy variables to their normalized length. In the above example,
-   * we store "ABC" -> 3.
-   */
-  NodeNodeMap d_proxyVarToLength;
-  /** List of terms that we have register length for */
-  NodeSet d_lengthLemmaTermsCache;
-
-  /** infer substitution proxy vars
-   *
-   * This method attempts to (partially) convert the formula n into a
-   * substitution of the form:
-   *   v1 -> s1, ..., vn -> sn
-   * where s1 ... sn are proxy variables and v1 ... vn are either variables
-   * or constants.
-   *
-   * This method ensures that P ^ v1 = s1 ^ ... ^ vn = sn ^ unproc is equivalent
-   * to P ^ n, where P is the conjunction of equalities corresponding to the
-   * definition of all proxy variables introduced by the theory of strings.
-   *
-   * For example, say that v1 was introduced as a proxy variable for "ABC", and
-   * v2 was introduced as a proxy variable for "AA".
-   *
-   * Given the input n := v1 = "ABC" ^ v2 = x ^ x = "AA", this method sets:
-   * vars = { x },
-   * subs = { v2 },
-   * unproc = {}.
-   * In particular, this says that the information content of n is essentially
-   * x = v2. The first and third conjunctions can be dropped from the
-   * explanation since these equalities simply correspond to definitions
-   * of proxy variables.
-   *
-   * This method is used as a performance heuristic. It can infer when the
-   * explanation of a fact depends only trivially on equalities corresponding
-   * to definitions of proxy variables, which can be omitted since they are
-   * assumed to hold globally.
-   */
-  void inferSubstitutionProxyVars(Node n,
-                                  std::vector<Node>& vars,
-                                  std::vector<Node>& subs,
-                                  std::vector<Node>& unproc) const;
 };
 
 }  // namespace strings
diff --git a/src/theory/strings/strings_fmf.cpp b/src/theory/strings/strings_fmf.cpp
index 02af3949e..87ce5e7c6 100644
--- a/src/theory/strings/strings_fmf.cpp
+++ b/src/theory/strings/strings_fmf.cpp
@@ -26,45 +26,26 @@ namespace strings {
 StringsFmf::StringsFmf(context::Context* c,
                        context::UserContext* u,
                        Valuation valuation,
-                       SkolemCache& skc)
+                       TermRegistry& tr)
     : d_sslds(nullptr),
       d_satContext(c),
       d_userContext(u),
       d_valuation(valuation),
-      d_skCache(skc),
-      d_inputVars(u)
+      d_termReg(tr)
 {
 }
 
 StringsFmf::~StringsFmf() {}
 
-void StringsFmf::preRegisterTerm(TNode n)
-{
-  if (!n.getType().isStringLike())
-  {
-    return;
-  }
-  Kind k = n.getKind();
-  // Our decision strategy will minimize the length of this term if it is a
-  // variable but not an internally generated Skolem, or a term that does
-  // not belong to this theory.
-  if (n.isVar() ? !d_skCache.isSkolem(n) : kindToTheoryId(k) != THEORY_STRINGS)
-  {
-    d_inputVars.insert(n);
-    Trace("strings-dstrat-reg") << "input variable: " << n << std::endl;
-  }
-}
-
 void StringsFmf::presolve()
 {
   d_sslds.reset(new StringSumLengthDecisionStrategy(
       d_satContext, d_userContext, d_valuation));
   Trace("strings-dstrat-reg")
       << "presolve: register decision strategy." << std::endl;
+  const NodeSet& ivars = d_termReg.getInputVars();
   std::vector<Node> inputVars;
-  for (NodeSet::const_iterator itr = d_inputVars.begin();
-       itr != d_inputVars.end();
-       ++itr)
+  for (NodeSet::const_iterator itr = ivars.begin(); itr != ivars.end(); ++itr)
   {
     inputVars.push_back(*itr);
   }
diff --git a/src/theory/strings/strings_fmf.h b/src/theory/strings/strings_fmf.h
index 375a100ff..9dfb1f590 100644
--- a/src/theory/strings/strings_fmf.h
+++ b/src/theory/strings/strings_fmf.h
@@ -22,7 +22,7 @@
 #include "context/context.h"
 #include "expr/node.h"
 #include "theory/decision_strategy.h"
-#include "theory/strings/skolem_cache.h"
+#include "theory/strings/term_registry.h"
 #include "theory/valuation.h"
 
 namespace CVC4 {
@@ -42,15 +42,8 @@ class StringsFmf
   StringsFmf(context::Context* c,
              context::UserContext* u,
              Valuation valuation,
-             SkolemCache& skc);
+             TermRegistry& tr);
   ~StringsFmf();
-  /** preRegister term
-   *
-   * This determines if the term n should be added to d_inputVars, the set
-   * of terms of type string whose length we are minimizing with this decision
-   * strategy.
-   */
-  void preRegisterTerm(TNode n);
   /** presolve
    *
    * This initializes a (new copy) of the decision strategy d_sslds.
@@ -105,15 +98,10 @@ class StringsFmf
   context::Context* d_satContext;
   /** The user level assertion context for the theory of strings. */
   context::UserContext* d_userContext;
-  /** The valuation object */
+  /** The valuation object of theory of strings */
   Valuation d_valuation;
-  /** reference to the skolem cache */
-  SkolemCache& d_skCache;
-  /**
-   * The set of terms of type string whose length we are minimizing
-   * with this decision strategy.
-   */
-  NodeSet d_inputVars;
+  /** The term registry of theory of strings */
+  TermRegistry& d_termReg;
 };
 
 }  // namespace strings
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp
new file mode 100644
index 000000000..4f1e41ebb
--- /dev/null
+++ b/src/theory/strings/term_registry.cpp
@@ -0,0 +1,578 @@
+/*********************                                                        */
+/*! \file term_registry.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Tianyi Liang, Morgan Deters
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of term registry for the theory of strings.
+ **/
+
+#include "theory/strings/term_registry.h"
+
+#include "expr/attribute.h"
+#include "options/strings_options.h"
+#include "smt/logic_exception.h"
+#include "theory/rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+
+using namespace std;
+using namespace CVC4::context;
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+struct StringsProxyVarAttributeId
+{
+};
+typedef expr::Attribute<StringsProxyVarAttributeId, bool>
+    StringsProxyVarAttribute;
+
+TermRegistry::TermRegistry(context::Context* c,
+                           context::UserContext* u,
+                           eq::EqualityEngine& ee,
+                           OutputChannel& out,
+                           SequencesStatistics& statistics)
+    : d_ee(ee),
+      d_out(out),
+      d_statistics(statistics),
+      d_hasStrCode(false),
+      d_functionsTerms(c),
+      d_inputVars(u),
+      d_preregisteredTerms(u),
+      d_registeredTerms(u),
+      d_registeredTypes(u),
+      d_proxyVar(u),
+      d_proxyVarToLength(u),
+      d_lengthLemmaTermsCache(u)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  d_zero = nm->mkConst(Rational(0));
+  d_one = nm->mkConst(Rational(1));
+  d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
+  d_cardSize = utils::getAlphabetCardinality();
+}
+
+TermRegistry::~TermRegistry() {}
+
+void TermRegistry::preRegisterTerm(TNode n)
+{
+  if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
+  {
+    return;
+  }
+  d_preregisteredTerms.insert(n);
+  Trace("strings-preregister")
+      << "TheoryString::preregister : " << n << std::endl;
+  // check for logic exceptions
+  Kind k = n.getKind();
+  if (!options::stringExp())
+  {
+    if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
+        || k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_STRCTN
+        || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
+        || k == STRING_REV)
+    {
+      std::stringstream ss;
+      ss << "Term of kind " << k
+         << " not supported in default mode, try --strings-exp";
+      throw LogicException(ss.str());
+    }
+  }
+  if (k == EQUAL)
+  {
+    d_ee.addTriggerEquality(n);
+    return;
+  }
+  else if (k == STRING_IN_REGEXP)
+  {
+    d_out.requirePhase(n, true);
+    d_ee.addTriggerPredicate(n);
+    d_ee.addTerm(n[0]);
+    d_ee.addTerm(n[1]);
+    return;
+  }
+  else if (k == STRING_TO_CODE)
+  {
+    d_hasStrCode = true;
+  }
+  registerTerm(n, 0);
+  TypeNode tn = n.getType();
+  if (tn.isRegExp() && n.isVar())
+  {
+    std::stringstream ss;
+    ss << "Regular expression variables are not supported.";
+    throw LogicException(ss.str());
+  }
+  if (tn.isString())
+  {
+    // all characters of constants should fall in the alphabet
+    if (n.isConst())
+    {
+      std::vector<unsigned> vec = n.getConst<String>().getVec();
+      for (unsigned u : vec)
+      {
+        if (u >= d_cardSize)
+        {
+          std::stringstream ss;
+          ss << "Characters in string \"" << n
+             << "\" are outside of the given alphabet.";
+          throw LogicException(ss.str());
+        }
+      }
+    }
+    d_ee.addTerm(n);
+  }
+  else if (tn.isBoolean())
+  {
+    // Get triggered for both equal and dis-equal
+    d_ee.addTriggerPredicate(n);
+  }
+  else
+  {
+    // Function applications/predicates
+    d_ee.addTerm(n);
+  }
+  // Set d_functionsTerms stores all function applications that are
+  // relevant to theory combination. Notice that this is a subset of
+  // the applications whose kinds are function kinds in the equality
+  // engine. This means it does not include applications of operators
+  // like re.++, which is not a function kind in the equality engine.
+  // Concatenation terms do not need to be considered here because
+  // their arguments have string type and do not introduce any shared
+  // terms.
+  if (n.hasOperator() && d_ee.isFunctionKind(k) && k != STRING_CONCAT)
+  {
+    d_functionsTerms.push_back(n);
+  }
+  if (options::stringFMF())
+  {
+    if (tn.isStringLike())
+    {
+      // Our decision strategy will minimize the length of this term if it is a
+      // variable but not an internally generated Skolem, or a term that does
+      // not belong to this theory.
+      if (n.isVar() ? !d_skCache.isSkolem(n)
+                    : kindToTheoryId(k) != THEORY_STRINGS)
+      {
+        d_inputVars.insert(n);
+        Trace("strings-preregister") << "input variable: " << n << std::endl;
+      }
+    }
+  }
+}
+
+void TermRegistry::registerTerm(Node n, int effort)
+{
+  TypeNode tn = n.getType();
+  bool do_register = true;
+  if (!tn.isStringLike())
+  {
+    if (options::stringEagerLen())
+    {
+      do_register = effort == 0;
+    }
+    else
+    {
+      do_register = effort > 0 || n.getKind() != STRING_CONCAT;
+    }
+  }
+  if (!do_register)
+  {
+    return;
+  }
+  if (d_registeredTerms.find(n) != d_registeredTerms.end())
+  {
+    return;
+  }
+  d_registeredTerms.insert(n);
+  // ensure the type is registered
+  registerType(tn);
+  NodeManager* nm = NodeManager::currentNM();
+  Debug("strings-register") << "TheoryStrings::registerTerm() " << n
+                            << ", effort = " << effort << std::endl;
+  Node regTermLem;
+  if (tn.isStringLike())
+  {
+    // register length information:
+    //  for variables, split on empty vs positive length
+    //  for concat/const/replace, introduce proxy var and state length relation
+    regTermLem = getRegisterTermLemma(n);
+  }
+  else if (n.getKind() == STRING_TO_CODE)
+  {
+    // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
+    Node code_len = utils::mkNLength(n[0]).eqNode(d_one);
+    Node code_eq_neg1 = n.eqNode(d_negOne);
+    Node code_range = nm->mkNode(
+        AND,
+        nm->mkNode(GEQ, n, d_zero),
+        nm->mkNode(
+            LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
+    regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
+  }
+  else if (n.getKind() == STRING_STRIDOF)
+  {
+    Node len = utils::mkNLength(n[0]);
+    regTermLem = nm->mkNode(AND,
+                            nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))),
+                            nm->mkNode(LEQ, n, len));
+  }
+  if (!regTermLem.isNull())
+  {
+    Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl;
+    ++(d_statistics.d_lemmasRegisterTerm);
+    d_out.lemma(regTermLem);
+  }
+}
+
+void TermRegistry::registerType(TypeNode tn)
+{
+  if (d_registeredTypes.find(tn) != d_registeredTypes.end())
+  {
+    return;
+  }
+  d_registeredTypes.insert(tn);
+  if (tn.isStringLike())
+  {
+    // preregister the empty word for the type
+    Node emp = Word::mkEmptyWord(tn);
+    if (!d_ee.hasTerm(emp))
+    {
+      preRegisterTerm(emp);
+    }
+  }
+}
+
+Node TermRegistry::getRegisterTermLemma(Node n)
+{
+  Assert(n.getType().isStringLike());
+  NodeManager* nm = NodeManager::currentNM();
+  // register length information:
+  //  for variables, split on empty vs positive length
+  //  for concat/const/replace, introduce proxy var and state length relation
+  Node lsum;
+  if (n.getKind() != STRING_CONCAT && !n.isConst())
+  {
+    Node lsumb = nm->mkNode(STRING_LENGTH, n);
+    lsum = Rewriter::rewrite(lsumb);
+    // can register length term if it does not rewrite
+    if (lsum == lsumb)
+    {
+      registerTermAtomic(n, LENGTH_SPLIT);
+      return Node::null();
+    }
+  }
+  Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
+  StringsProxyVarAttribute spva;
+  sk.setAttribute(spva, true);
+  Node eq = Rewriter::rewrite(sk.eqNode(n));
+  d_proxyVar[n] = sk;
+  // If we are introducing a proxy for a constant or concat term, we do not
+  // need to send lemmas about its length, since its length is already
+  // implied.
+  if (n.isConst() || n.getKind() == STRING_CONCAT)
+  {
+    // do not send length lemma for sk.
+    registerTermAtomic(sk, LENGTH_IGNORE);
+  }
+  Node skl = nm->mkNode(STRING_LENGTH, sk);
+  if (n.getKind() == STRING_CONCAT)
+  {
+    std::vector<Node> nodeVec;
+    for (const Node& nc : n)
+    {
+      if (nc.getAttribute(StringsProxyVarAttribute()))
+      {
+        Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
+        nodeVec.push_back(d_proxyVarToLength[nc]);
+      }
+      else
+      {
+        Node lni = nm->mkNode(STRING_LENGTH, nc);
+        nodeVec.push_back(lni);
+      }
+    }
+    lsum = nm->mkNode(PLUS, nodeVec);
+    lsum = Rewriter::rewrite(lsum);
+  }
+  else if (n.isConst())
+  {
+    lsum = nm->mkConst(Rational(Word::getLength(n)));
+  }
+  Assert(!lsum.isNull());
+  d_proxyVarToLength[sk] = lsum;
+  Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
+
+  return nm->mkNode(AND, eq, ceq);
+}
+
+void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
+{
+  if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
+  {
+    return;
+  }
+  d_lengthLemmaTermsCache.insert(n);
+
+  if (s == LENGTH_IGNORE)
+  {
+    // ignore it
+    return;
+  }
+  std::map<Node, bool> reqPhase;
+  Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
+  if (!lenLem.isNull())
+  {
+    Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl;
+    ++(d_statistics.d_lemmasRegisterTermAtomic);
+    d_out.lemma(lenLem);
+  }
+  for (const std::pair<const Node, bool>& rp : reqPhase)
+  {
+    d_out.requirePhase(rp.first, rp.second);
+  }
+}
+
+SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
+
+const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
+{
+  return d_functionsTerms;
+}
+
+const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars()
+    const
+{
+  return d_inputVars;
+}
+
+bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
+
+Node TermRegistry::getRegisterTermAtomicLemma(Node n,
+                                              LengthStatus s,
+                                              std::map<Node, bool>& reqPhase)
+{
+  Assert(n.getType().isStringLike());
+  NodeManager* nm = NodeManager::currentNM();
+  Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
+  Node emp = Word::mkEmptyWord(n.getType());
+  if (s == LENGTH_GEQ_ONE)
+  {
+    Node neq_empty = n.eqNode(emp).negate();
+    Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
+    Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
+    Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
+    return len_geq_one;
+  }
+
+  if (s == LENGTH_ONE)
+  {
+    Node len_one = n_len.eqNode(d_one);
+    Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
+    return len_one;
+  }
+  Assert(s == LENGTH_SPLIT);
+
+  std::vector<Node> lems;
+  // split whether the string is empty
+  Node n_len_eq_z = n_len.eqNode(d_zero);
+  Node n_len_eq_z_2 = n.eqNode(emp);
+  Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
+  case_empty = Rewriter::rewrite(case_empty);
+  Node case_nempty = nm->mkNode(GT, n_len, d_zero);
+  if (!case_empty.isConst())
+  {
+    Node lem = nm->mkNode(OR, case_empty, case_nempty);
+    lems.push_back(lem);
+    // prefer trying the empty case first
+    // notice that requirePhase must only be called on rewritten literals that
+    // occur in the CNF stream.
+    n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
+    Assert(!n_len_eq_z.isConst());
+    reqPhase[n_len_eq_z] = true;
+    n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
+    Assert(!n_len_eq_z_2.isConst());
+    reqPhase[n_len_eq_z_2] = true;
+  }
+  else if (!case_empty.getConst<bool>())
+  {
+    // the rewriter knows that n is non-empty
+    lems.push_back(case_nempty);
+  }
+  else
+  {
+    // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
+    // n ---> "". Since this method is only called on non-constants n, it must
+    // be that n = "" ^ len( n ) = 0 does not rewrite to true.
+    Assert(false);
+  }
+
+  // additionally add len( x ) >= 0 ?
+  if (options::stringLenGeqZ())
+  {
+    Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero);
+    n_len_geq = Rewriter::rewrite(n_len_geq);
+    lems.push_back(n_len_geq);
+  }
+
+  if (lems.empty())
+  {
+    return Node::null();
+  }
+  return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems);
+}
+
+Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
+{
+  if (n.getNumChildren() == 0)
+  {
+    Node pn = getProxyVariableFor(n);
+    if (pn.isNull())
+    {
+      return Node::null();
+    }
+    Node eq = n.eqNode(pn);
+    eq = Rewriter::rewrite(eq);
+    if (std::find(exp.begin(), exp.end(), eq) == exp.end())
+    {
+      exp.push_back(eq);
+    }
+    return pn;
+  }
+  std::vector<Node> children;
+  if (n.getMetaKind() == metakind::PARAMETERIZED)
+  {
+    children.push_back(n.getOperator());
+  }
+  for (const Node& nc : n)
+  {
+    if (n.getType().isRegExp())
+    {
+      children.push_back(nc);
+    }
+    else
+    {
+      Node ns = getSymbolicDefinition(nc, exp);
+      if (ns.isNull())
+      {
+        return Node::null();
+      }
+      else
+      {
+        children.push_back(ns);
+      }
+    }
+  }
+  return NodeManager::currentNM()->mkNode(n.getKind(), children);
+}
+
+Node TermRegistry::getProxyVariableFor(Node n) const
+{
+  NodeNodeMap::const_iterator it = d_proxyVar.find(n);
+  if (it != d_proxyVar.end())
+  {
+    return (*it).second;
+  }
+  return Node::null();
+}
+
+void TermRegistry::inferSubstitutionProxyVars(Node n,
+                                              std::vector<Node>& vars,
+                                              std::vector<Node>& subs,
+                                              std::vector<Node>& unproc) const
+{
+  if (n.getKind() == AND)
+  {
+    for (const Node& nc : n)
+    {
+      inferSubstitutionProxyVars(nc, vars, subs, unproc);
+    }
+    return;
+  }
+  if (n.getKind() == EQUAL)
+  {
+    Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end());
+    ns = Rewriter::rewrite(ns);
+    if (ns.getKind() == EQUAL)
+    {
+      Node s;
+      Node v;
+      for (unsigned i = 0; i < 2; i++)
+      {
+        Node ss;
+        // determine whether this side has a proxy variable
+        if (ns[i].getAttribute(StringsProxyVarAttribute()))
+        {
+          // it is a proxy variable
+          ss = ns[i];
+        }
+        else if (ns[i].isConst())
+        {
+          ss = getProxyVariableFor(ns[i]);
+        }
+        if (!ss.isNull())
+        {
+          v = ns[1 - i];
+          // if the other side is a constant or variable
+          if (v.getNumChildren() == 0)
+          {
+            if (s.isNull())
+            {
+              s = ss;
+            }
+            else
+            {
+              // both sides of the equality correspond to a proxy variable
+              if (ss == s)
+              {
+                // it is a trivial equality, e.g. between a proxy variable
+                // and its definition
+                return;
+              }
+              else
+              {
+                // equality between proxy variables, non-trivial
+                s = Node::null();
+              }
+            }
+          }
+        }
+      }
+      if (!s.isNull())
+      {
+        // the equality can be turned into a substitution
+        subs.push_back(s);
+        vars.push_back(v);
+        return;
+      }
+    }
+    else
+    {
+      n = ns;
+    }
+  }
+  if (!n.isConst() || !n.getConst<bool>())
+  {
+    unproc.push_back(n);
+  }
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/strings/term_registry.h b/src/theory/strings/term_registry.h
new file mode 100644
index 000000000..b68e44b81
--- /dev/null
+++ b/src/theory/strings/term_registry.h
@@ -0,0 +1,261 @@
+/*********************                                                        */
+/*! \file theory_strings.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Tianyi Liang, Tim King
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Term registry for the theory of strings.
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__TERM_REGISTRY_H
+#define CVC4__THEORY__STRINGS__TERM_REGISTRY_H
+
+#include "context/cdhashset.h"
+#include "context/cdlist.h"
+#include "theory/output_channel.h"
+#include "theory/strings/infer_info.h"
+#include "theory/strings/sequences_stats.h"
+#include "theory/strings/skolem_cache.h"
+#include "theory/uf/equality_engine.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+/**
+ * This class manages all the (pre)registration tasks for terms. These tasks
+ * include:
+ * (1) Sending out preregistration lemmas for terms,
+ * (2) Add terms to the equality engine,
+ * (3) Maintaining a list of terms d_functionsTerms (for theory combination),
+ * (4) Maintaining a list of input variables d_inputVars (for fmf).
+ * (5) Maintaining a skolem cache. Notice that this skolem cache is the
+ * official skolem cache that should be used by all modules in TheoryStrings.
+ */
+class TermRegistry
+{
+  typedef context::CDHashSet<Node, NodeHashFunction> NodeSet;
+  typedef context::CDHashSet<TypeNode, TypeNodeHashFunction> TypeNodeSet;
+  typedef context::CDHashMap<Node, Node, NodeHashFunction> NodeNodeMap;
+
+ public:
+  TermRegistry(context::Context* c,
+               context::UserContext* u,
+               eq::EqualityEngine& ee,
+               OutputChannel& out,
+               SequencesStatistics& statistics);
+  ~TermRegistry();
+  /**
+   * Preregister term, called when TheoryStrings::preRegisterTerm(n) is called.
+   * This does the following:
+   * - Checks for illegal terms and throws a LogicException if any term is
+   * not handled.
+   * - Adds the appropriate terms and triggers to the equality engine.
+   * - Updates information about which terms exist, including
+   * d_functionsTerms and d_hasStrCode. If we are using strings finite model
+   * finding (options::stringsFmf), we determine if the term n should be
+   * added to d_inputVars, the set of terms of type string whose length we are
+   * minimizing with its decision strategy.
+   * - Setting phase requirements on n if it is a formula and we prefer
+   * decisions with a particular polarity (e.g. positive regular expression
+   * memberships).
+   */
+  void preRegisterTerm(TNode n);
+  /** Register term
+   *
+   * This performs SAT-context-independent registration for a term n, which
+   * may cause lemmas to be sent on the output channel that involve
+   * "initial refinement lemmas" for n. This includes introducing proxy
+   * variables for string terms and asserting that str.code terms are within
+   * proper bounds.
+   *
+   * Effort is one of the following (TODO make enum #1881):
+   * 0 : upon preregistration or internal assertion
+   * 1 : upon occurrence in length term
+   * 2 : before normal form computation
+   * 3 : called on normal form terms
+   *
+   * Based on the strategy, we may choose to add these initial refinement
+   * lemmas at one of the following efforts, where if it is not the given
+   * effort, the call to this method does nothing.
+   */
+  void registerTerm(Node n, int effort);
+  /** register length
+   *
+   * This method is called on non-constant string terms n that are "atomic"
+   * with respect to length. That is, the rewritten form of len(n) is itself.
+   *
+   * It sends a lemma on the output channel that ensures that the length n
+   * satisfies its assigned status (given by argument s).
+   *
+   * If the status is LENGTH_ONE, we send the lemma len( n ) = 1.
+   *
+   * If the status is LENGTH_GEQ_ONE, we send a lemma n != "" ^ len( n ) > 0.
+   *
+   * If the status is LENGTH_SPLIT, we send a send a lemma of the form:
+   *   ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0
+   * This method also ensures that, when applicable, the left branch is taken
+   * first via calls to requirePhase.
+   *
+   * If the status is LENGTH_IGNORE, then no lemma is sent. This status is used
+   * e.g. when the length of n is already implied by other constraints.
+   *
+   * In contrast to the above functions, it makes immediate calls to the output
+   * channel instead of adding them to pending lists.
+   */
+  void registerTermAtomic(Node n, LengthStatus s);
+  //---------------------------- queries
+  /** Get the skolem cache of this object */
+  SkolemCache* getSkolemCache();
+  /** Get all function terms that have been preregistered to this object */
+  const context::CDList<TNode>& getFunctionTerms() const;
+  /**
+   * Get the "input variables", corresponding to the set of leaf nodes of
+   * string-like type that have been preregistered as terms to this object.
+   */
+  const context::CDHashSet<Node, NodeHashFunction>& getInputVars() const;
+  /** Returns true if any str.code terms have been preregistered */
+  bool hasStringCode() const;
+  //---------------------------- end queries
+  //---------------------------- proxy variables
+  /** Get symbolic definition
+   *
+   * This method returns the "symbolic definition" of n, call it n', and
+   * populates the vector exp with an explanation such that exp => n = n'.
+   *
+   * The symbolic definition of n is the term where (maximal) subterms of n
+   * are replaced by their proxy variables. For example, if we introduced
+   * proxy variable v for x ++ y, then given input x ++ y = w, this method
+   * returns v = w and adds v = x ++ y to exp.
+   */
+  Node getSymbolicDefinition(Node n, std::vector<Node>& exp) const;
+  /** Get proxy variable
+   *
+   * If this method returns the proxy variable for (string) term n if one
+   * exists, otherwise it returns null.
+   */
+  Node getProxyVariableFor(Node n) const;
+
+  /** infer substitution proxy vars
+   *
+   * This method attempts to (partially) convert the formula n into a
+   * substitution of the form:
+   *   v1 -> s1, ..., vn -> sn
+   * where s1 ... sn are proxy variables and v1 ... vn are either variables
+   * or constants.
+   *
+   * This method ensures that P ^ v1 = s1 ^ ... ^ vn = sn ^ unproc is equivalent
+   * to P ^ n, where P is the conjunction of equalities corresponding to the
+   * definition of all proxy variables introduced by the theory of strings.
+   *
+   * For example, say that v1 was introduced as a proxy variable for "ABC", and
+   * v2 was introduced as a proxy variable for "AA".
+   *
+   * Given the input n := v1 = "ABC" ^ v2 = x ^ x = "AA", this method sets:
+   * vars = { x },
+   * subs = { v2 },
+   * unproc = {}.
+   * In particular, this says that the information content of n is essentially
+   * x = v2. The first and third conjunctions can be dropped from the
+   * explanation since these equalities simply correspond to definitions
+   * of proxy variables.
+   *
+   * This method is used as a performance heuristic. It can infer when the
+   * explanation of a fact depends only trivially on equalities corresponding
+   * to definitions of proxy variables, which can be omitted since they are
+   * assumed to hold globally.
+   */
+  void inferSubstitutionProxyVars(Node n,
+                                  std::vector<Node>& vars,
+                                  std::vector<Node>& subs,
+                                  std::vector<Node>& unproc) const;
+  //---------------------------- end proxy variables
+ private:
+  /** Common constants */
+  Node d_zero;
+  Node d_one;
+  Node d_negOne;
+  /** the cardinality of the alphabet */
+  uint32_t d_cardSize;
+  /** Reference to equality engine of the theory of strings. */
+  eq::EqualityEngine& d_ee;
+  /** Reference to the output channel of the theory of strings. */
+  OutputChannel& d_out;
+  /** Reference to the statistics for the theory of strings/sequences. */
+  SequencesStatistics& d_statistics;
+  /** have we asserted any str.code terms? */
+  bool d_hasStrCode;
+  /** The cache of all skolems, which is owned by this class. */
+  SkolemCache d_skCache;
+  /** All function terms that the theory has seen in the current SAT context */
+  context::CDList<TNode> d_functionsTerms;
+  /**
+   * The set of terms of type string that are abstracted as leaf nodes.
+   */
+  NodeSet d_inputVars;
+  /** The user-context dependent cache of terms that have been preregistered */
+  NodeSet d_preregisteredTerms;
+  /** The user-context dependent cache of terms that have been registered */
+  NodeSet d_registeredTerms;
+  /** The types that we have preregistered */
+  TypeNodeSet d_registeredTypes;
+  /**
+   * Map string terms to their "proxy variables". Proxy variables are used are
+   * intermediate variables so that length information can be communicated for
+   * constants. For example, to communicate that "ABC" has length 3, we
+   * introduce a proxy variable v_{"ABC"} for "ABC", and assert:
+   *   v_{"ABC"} = "ABC" ^ len( v_{"ABC"} ) = 3
+   * Notice this is required since we cannot directly write len( "ABC" ) = 3,
+   * which rewrites to 3 = 3.
+   * In the above example, we store "ABC" -> v_{"ABC"} in this map.
+   */
+  NodeNodeMap d_proxyVar;
+  /**
+   * Map from proxy variables to their normalized length. In the above example,
+   * we store "ABC" -> 3.
+   */
+  NodeNodeMap d_proxyVarToLength;
+  /** List of terms that we have register length for */
+  NodeSet d_lengthLemmaTermsCache;
+  /** Register type
+   *
+   * Ensures the theory solver is setup to handle string-like type tn. In
+   * particular, this includes:
+   * - Calling preRegisterTerm on the empty word for tn
+   */
+  void registerType(TypeNode tn);
+  /** register term
+   *
+   * This method is called on non-constant string terms n. It returns a lemma
+   * that should be sent on the output channel of theory of strings upon
+   * registration of this term, or null if no lemma is necessary.
+   *
+   * If n is an atomic term, the method registerTermAtomic is called for n
+   * and s = LENGTH_SPLIT and no lemma is returned.
+   */
+  Node getRegisterTermLemma(Node n);
+  /**
+   * Get the lemma required for registering the length information for
+   * atomic term n given length status s. For details, see registerTermAtomic.
+   *
+   * Additionally, this method may map literals to a required polarity in the
+   * argument reqPhase, which should be processed by a call to requiredPhase by
+   * the caller of this method.
+   */
+  Node getRegisterTermAtomicLemma(Node n,
+                                  LengthStatus s,
+                                  std::map<Node, bool>& reqPhase);
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
+
+#endif /* CVC4__THEORY__STRINGS__TERM_REGISTRY_H */
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index 81e43c595..3e490f285 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -72,18 +72,14 @@ TheoryStrings::TheoryStrings(context::Context* c,
       d_statistics(),
       d_equalityEngine(d_notify, c, "theory::strings::ee", true),
       d_state(c, d_equalityEngine, d_valuation),
-      d_im(*this, c, u, d_state, d_sk_cache, out, d_statistics),
-      d_pregistered_terms_cache(u),
-      d_registered_terms_cache(u),
-      d_registeredTypesCache(u),
-      d_functionsTerms(c),
-      d_has_str_code(false),
+      d_termReg(c, u, d_equalityEngine, out, d_statistics),
+      d_im(*this, c, u, d_state, d_termReg, out, d_statistics),
       d_rewriter(&d_statistics.d_rewrites),
       d_bsolver(c, u, d_state, d_im),
-      d_csolver(c, u, d_state, d_im, d_sk_cache, d_bsolver),
+      d_csolver(c, u, d_state, d_im, d_termReg, d_bsolver),
       d_esolver(nullptr),
       d_rsolver(nullptr),
-      d_stringsFmf(c, u, valuation, d_sk_cache),
+      d_stringsFmf(c, u, valuation, d_termReg),
       d_strategy_init(false)
 {
   setupExtTheory();
@@ -92,7 +88,7 @@ TheoryStrings::TheoryStrings(context::Context* c,
                                  u,
                                  d_state,
                                  d_im,
-                                 d_sk_cache,
+                                 d_termReg,
                                  d_rewriter,
                                  d_bsolver,
                                  d_csolver,
@@ -374,7 +370,7 @@ bool TheoryStrings::collectModelInfoType(
         {
           // does it have a code and the length of these equivalence classes are
           // one?
-          if (d_has_str_code && lts_values[i] == d_one)
+          if (d_termReg.hasStringCode() && lts_values[i] == d_one)
           {
             EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false);
             if (eip && !eip->d_codeTerm.get().isNull())
@@ -550,94 +546,11 @@ bool TheoryStrings::collectModelInfoType(
 // MAIN SOLVER
 /////////////////////////////////////////////////////////////////////////////
 
-
-void TheoryStrings::preRegisterTerm(TNode n) {
-  if( d_pregistered_terms_cache.find(n) == d_pregistered_terms_cache.end() ) {
-    d_pregistered_terms_cache.insert(n);
-    Trace("strings-preregister")
-        << "TheoryString::preregister : " << n << std::endl;
-    //check for logic exceptions
-    Kind k = n.getKind();
-    if( !options::stringExp() ){
-      if (k == kind::STRING_STRIDOF || k == kind::STRING_ITOS
-          || k == kind::STRING_STOI || k == kind::STRING_STRREPL
-          || k == kind::STRING_STRREPLALL || k == kind::STRING_STRCTN
-          || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
-          || k == STRING_REV)
-      {
-        std::stringstream ss;
-        ss << "Term of kind " << k
-           << " not supported in default mode, try --strings-exp";
-        throw LogicException(ss.str());
-      }
-    }
-    switch (k)
-    {
-      case kind::EQUAL: {
-        d_equalityEngine.addTriggerEquality(n);
-        break;
-      }
-      case kind::STRING_IN_REGEXP: {
-        d_out->requirePhase(n, true);
-        d_equalityEngine.addTriggerPredicate(n);
-        d_equalityEngine.addTerm(n[0]);
-        d_equalityEngine.addTerm(n[1]);
-        break;
-      }
-      default: {
-        registerTerm(n, 0);
-        TypeNode tn = n.getType();
-        if (tn.isRegExp() && n.isVar())
-        {
-          std::stringstream ss;
-          ss << "Regular expression variables are not supported.";
-          throw LogicException(ss.str());
-        }
-        if( tn.isString() ) {
-          // all characters of constants should fall in the alphabet
-          if (n.isConst())
-          {
-            std::vector<unsigned> vec = n.getConst<String>().getVec();
-            for (unsigned u : vec)
-            {
-              if (u >= d_cardSize)
-              {
-                std::stringstream ss;
-                ss << "Characters in string \"" << n
-                   << "\" are outside of the given alphabet.";
-                throw LogicException(ss.str());
-              }
-            }
-          }
-          d_equalityEngine.addTerm(n);
-        } else if (tn.isBoolean()) {
-          // Get triggered for both equal and dis-equal
-          d_equalityEngine.addTriggerPredicate(n);
-        } else {
-          // Function applications/predicates
-          d_equalityEngine.addTerm(n);
-        }
-        // Set d_functionsTerms stores all function applications that are
-        // relevant to theory combination. Notice that this is a subset of
-        // the applications whose kinds are function kinds in the equality
-        // engine. This means it does not include applications of operators
-        // like re.++, which is not a function kind in the equality engine.
-        // Concatenation terms do not need to be considered here because
-        // their arguments have string type and do not introduce any shared
-        // terms.
-        if (n.hasOperator() && d_equalityEngine.isFunctionKind(k)
-            && k != kind::STRING_CONCAT)
-        {
-          d_functionsTerms.push_back( n );
-        }
-      }
-    }
-    // register with finite model finding
-    if (options::stringFMF())
-    {
-      d_stringsFmf.preRegisterTerm(n);
-    }
-  }
+void TheoryStrings::preRegisterTerm(TNode n)
+{
+  Trace("strings-preregister")
+      << "TheoryStrings::preRegisterTerm: " << n << std::endl;
+  d_termReg.preRegisterTerm(n);
 }
 
 Node TheoryStrings::expandDefinition(Node node)
@@ -799,7 +712,7 @@ void TheoryStrings::eqNotifyNewClass(TNode t){
   {
     Trace("strings-debug") << "New length eqc : " << t << std::endl;
     //we care about the length of this string
-    registerTerm( t[0], 1 );
+    d_termReg.registerTerm(t[0], 1);
   }
   d_state.eqNotifyNewClass(t);
 }
@@ -887,9 +800,10 @@ void TheoryStrings::computeCareGraph(){
   // since operators are polymorphic, taking strings/sequences.
   std::map<std::pair<TypeNode, Node>, TNodeTrie> index;
   std::map< Node, unsigned > arity;
-  unsigned functionTerms = d_functionsTerms.size();
+  const context::CDList<TNode>& fterms = d_termReg.getFunctionTerms();
+  size_t functionTerms = fterms.size();
   for (unsigned i = 0; i < functionTerms; ++ i) {
-    TNode f1 = d_functionsTerms[i];
+    TNode f1 = fterms[i];
     Trace("strings-cg") << "...build for " << f1 << std::endl;
     Node op = f1.getOperator();
     std::vector< TNode > reps;
@@ -926,7 +840,7 @@ void TheoryStrings::assertPendingFact(Node atom, bool polarity, Node exp) {
       if (!d_equalityEngine.hasTerm(atom[j])
           && atom[j].getType().isStringLike())
       {
-        registerTerm( atom[j], 0 );
+        d_termReg.registerTerm(atom[j], 0);
       }
     }
     Trace("strings-pending-debug") << "  Now assert equality" << std::endl;
@@ -980,7 +894,7 @@ void TheoryStrings::checkRegisterTermsPreNormalForm()
       Node n = (*eqc_i);
       if (!d_bsolver.isCongruent(n))
       {
-        registerTerm(n, 2);
+        d_termReg.registerTerm(n, 2);
       }
       ++eqc_i;
     }
@@ -991,7 +905,7 @@ void TheoryStrings::checkCodes()
 {
   // ensure that lemmas regarding str.code been added for each constant string
   // of length one
-  if (d_has_str_code)
+  if (d_termReg.hasStringCode())
   {
     NodeManager* nm = NodeManager::currentNM();
     // str.code applied to the code term for each equivalence class that has a
@@ -1012,7 +926,7 @@ void TheoryStrings::checkCodes()
         Node cc = nm->mkNode(kind::STRING_TO_CODE, c);
         cc = Rewriter::rewrite(cc);
         Assert(cc.isConst());
-        Node cp = d_im.getProxyVariableFor(c);
+        Node cp = d_termReg.getProxyVariableFor(c);
         AlwaysAssert(!cp.isNull());
         Node vc = nm->mkNode(STRING_TO_CODE, cp);
         if (!d_state.areEqual(cc, vc))
@@ -1062,91 +976,6 @@ void TheoryStrings::checkCodes()
   }
 }
 
-void TheoryStrings::registerTerm(Node n, int effort)
-{
-  TypeNode tn = n.getType();
-  bool do_register = true;
-  if (!tn.isStringLike())
-  {
-    if (options::stringEagerLen())
-    {
-      do_register = effort == 0;
-    }
-    else
-    {
-      do_register = effort > 0 || n.getKind() != STRING_CONCAT;
-    }
-  }
-  if (!do_register)
-  {
-    return;
-  }
-  if (d_registered_terms_cache.find(n) != d_registered_terms_cache.end())
-  {
-    return;
-  }
-  d_registered_terms_cache.insert(n);
-  // ensure the type is registered
-  registerType(tn);
-  NodeManager* nm = NodeManager::currentNM();
-  Debug("strings-register") << "TheoryStrings::registerTerm() " << n
-                            << ", effort = " << effort << std::endl;
-  Node regTermLem;
-  if (tn.isStringLike())
-  {
-    // register length information:
-    //  for variables, split on empty vs positive length
-    //  for concat/const/replace, introduce proxy var and state length relation
-    regTermLem = d_im.registerTerm(n);
-  }
-  else if (n.getKind() == STRING_TO_CODE)
-  {
-    d_has_str_code = true;
-    // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
-    Node code_len = utils::mkNLength(n[0]).eqNode(d_one);
-    Node code_eq_neg1 = n.eqNode(d_neg_one);
-    Node code_range = nm->mkNode(
-        AND,
-        nm->mkNode(GEQ, n, d_zero),
-        nm->mkNode(
-            LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
-    regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
-  }
-  else if (n.getKind() == STRING_STRIDOF)
-  {
-    Node len = utils::mkNLength(n[0]);
-    regTermLem = nm->mkNode(AND,
-                            nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))),
-                            nm->mkNode(LEQ, n, len));
-  }
-  if (!regTermLem.isNull())
-  {
-    Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl;
-    ++(d_statistics.d_lemmasRegisterTerm);
-    d_out->lemma(regTermLem);
-  }
-}
-
-void TheoryStrings::registerType(TypeNode tn)
-{
-  if (d_registeredTypesCache.find(tn) != d_registeredTypesCache.end())
-  {
-    return;
-  }
-  d_registeredTypesCache.insert(tn);
-  if (tn.isStringLike())
-  {
-    // preregister the empty word for the type
-    Node emp = Word::mkEmptyWord(tn);
-    if (!d_equalityEngine.hasTerm(emp))
-    {
-      preRegisterTerm(emp);
-    }
-  }
-}
-
 void TheoryStrings::checkRegisterTermsNormalForms()
 {
   const std::vector<Node>& seqc = d_bsolver.getStringEqc();
@@ -1159,7 +988,7 @@ void TheoryStrings::checkRegisterTermsNormalForms()
     if (lt.isNull())
     {
       Node c = utils::mkNConcat(nfi.d_nf, eqc.getType());
-      registerTerm(c, 3);
+      d_termReg.registerTerm(c, 3);
     }
   }
 }
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h
index 453e4eca9..4a61730f4 100644
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -24,7 +24,6 @@
 
 #include "context/cdhashset.h"
 #include "context/cdlist.h"
-#include "expr/attribute.h"
 #include "expr/node_trie.h"
 #include "theory/strings/base_solver.h"
 #include "theory/strings/core_solver.h"
@@ -40,6 +39,7 @@
 #include "theory/strings/solver_state.h"
 #include "theory/strings/strings_fmf.h"
 #include "theory/strings/strings_rewriter.h"
+#include "theory/strings/term_registry.h"
 #include "theory/theory.h"
 #include "theory/uf/equality_engine.h"
 
@@ -93,9 +93,6 @@ enum InferStep
 };
 std::ostream& operator<<(std::ostream& out, Inference i);
 
-struct StringsProxyVarAttributeId {};
-typedef expr::Attribute< StringsProxyVarAttributeId, bool > StringsProxyVarAttribute;
-
 class TheoryStrings : public Theory {
   friend class InferenceManager;
   typedef context::CDList<Node> NodeList;
@@ -238,13 +235,10 @@ class TheoryStrings : public Theory {
   eq::EqualityEngine d_equalityEngine;
   /** The solver state object */
   SolverState d_state;
+  /** The term registry for this theory */
+  TermRegistry d_termReg;
   /** The (custom) output channel of the theory of strings */
   InferenceManager d_im;
-  // preReg cache
-  NodeSet d_pregistered_terms_cache;
-  NodeSet d_registered_terms_cache;
-  /** The types that we have preregistered */
-  TypeNodeSet d_registeredTypesCache;
   std::vector< Node > d_empty_vec;
 private:
 
@@ -265,24 +259,6 @@ private:
   void addSharedTerm(TNode n) override;
   EqualityStatus getEqualityStatus(TNode a, TNode b) override;
 
- private:
-  /** All the function terms that the theory has seen */
-  context::CDList<TNode> d_functionsTerms;
-private:
-  /** have we asserted any str.code terms? */
-  bool d_has_str_code;
-  // static information about extf
-  class ExtfInfo {
-  public:
-    //all variables in this term
-    std::vector< Node > d_vars;
-  };
-
- private:
-
-  /** cache of all skolems */
-  SkolemCache d_sk_cache;
-
  private:
   void addCarePairs(TNodeTrie* t1,
                     TNodeTrie* t2,
@@ -334,38 +310,10 @@ private:
    */
   void assertPendingFact(Node atom, bool polarity, Node exp);
 
-  /** Register term
-   *
-   * This performs SAT-context-independent registration for a term n, which
-   * may cause lemmas to be sent on the output channel that involve
-   * "initial refinement lemmas" for n. This includes introducing proxy
-   * variables for string terms and asserting that str.code terms are within
-   * proper bounds.
-   *
-   * Effort is one of the following (TODO make enum #1881):
-   * 0 : upon preregistration or internal assertion
-   * 1 : upon occurrence in length term
-   * 2 : before normal form computation
-   * 3 : called on normal form terms
-   *
-   * Based on the strategy, we may choose to add these initial refinement
-   * lemmas at one of the following efforts, where if it is not the given
-   * effort, the call to this method does nothing.
-   */
-  void registerTerm(Node n, int effort);
-  /** Register type
-   *
-   * Ensures the theory solver is setup to handle string-like type tn. In
-   * particular, this includes:
-   * - Calling preRegisterTerm on the empty word for tn
-   */
-  void registerType(TypeNode tn);
-
   // Symbolic Regular Expression
  private:
   /** The theory rewriter for this theory. */
   StringsRewriter d_rewriter;
-
   /**
    * The base solver, responsible for reasoning about congruent terms and
    * inferring constants for equivalence classes.