From db43ae511c2103f1e9718a8954e26cf7866d14a8 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Mon, 7 May 2018 10:22:27 -0500
Subject: [PATCH] Add support for str.code (#1821)

---
 src/parser/smt2/smt2.cpp                      |   1 +
 src/printer/smt2/smt2_printer.cpp             |   1 +
 src/theory/strings/kinds                      |   2 +
 src/theory/strings/theory_strings.cpp         | 374 +++++++++++++-----
 src/theory/strings/theory_strings.h           |  38 +-
 .../strings/theory_strings_rewriter.cpp       |  28 ++
 src/theory/strings/theory_strings_rewriter.h  |   6 +
 .../strings/theory_strings_type_rules.h       |   4 +-
 src/util/regexp.cpp                           |  26 ++
 src/util/regexp.h                             |  48 ++-
 test/regress/Makefile.tests                   |   5 +
 .../regress1/strings/code-sequence.smt2       |  14 +
 .../regress1/strings/str-code-sat.smt2        |  24 ++
 .../regress1/strings/str-code-unsat-2.smt2    |   6 +
 .../regress1/strings/str-code-unsat-3.smt2    |  21 +
 .../regress1/strings/str-code-unsat.smt2      |  21 +
 16 files changed, 507 insertions(+), 112 deletions(-)
 create mode 100644 test/regress/regress1/strings/code-sequence.smt2
 create mode 100644 test/regress/regress1/strings/str-code-sat.smt2
 create mode 100644 test/regress/regress1/strings/str-code-unsat-2.smt2
 create mode 100644 test/regress/regress1/strings/str-code-unsat-3.smt2
 create mode 100644 test/regress/regress1/strings/str-code-unsat.smt2

diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp
index 1d66ab0c1..09dccdfbd 100644
--- a/src/parser/smt2/smt2.cpp
+++ b/src/parser/smt2/smt2.cpp
@@ -153,6 +153,7 @@ void Smt2::addStringOperators() {
   addOperator(kind::REGEXP_OPT, "re.opt");
   addOperator(kind::REGEXP_RANGE, "re.range");
   addOperator(kind::REGEXP_LOOP, "re.loop");
+  addOperator(kind::STRING_CODE, "str.code");
 }
 
 void Smt2::addFloatingPointOperators() {
diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp
index 96bee9724..2c6a26335 100644
--- a/src/printer/smt2/smt2_printer.cpp
+++ b/src/printer/smt2/smt2_printer.cpp
@@ -523,6 +523,7 @@ void Smt2Printer::toStream(std::ostream& out,
   case kind::STRING_SUFFIX:
   case kind::STRING_ITOS:
   case kind::STRING_STOI:
+  case kind::STRING_CODE:
   case kind::STRING_TO_REGEXP:
   case kind::REGEXP_CONCAT:
   case kind::REGEXP_UNION:
diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds
index 15dd5b423..34237f69e 100644
--- a/src/theory/strings/kinds
+++ b/src/theory/strings/kinds
@@ -24,6 +24,7 @@ operator STRING_PREFIX 2 "string prefixof"
 operator STRING_SUFFIX 2 "string suffixof"
 operator STRING_ITOS 1 "integer to string"
 operator STRING_STOI 1 "string to integer (total function)"
+operator STRING_CODE 1 "string to code, returns the code of the first character of the string if it has length one, -1 otherwise"
 
 #sort CHAR_TYPE \
 #    Cardinality::INTEGERS \
@@ -103,6 +104,7 @@ typerule STRING_PREFIX ::CVC4::theory::strings::StringPrefixOfTypeRule
 typerule STRING_SUFFIX ::CVC4::theory::strings::StringSuffixOfTypeRule
 typerule STRING_ITOS ::CVC4::theory::strings::StringIntToStrTypeRule
 typerule STRING_STOI ::CVC4::theory::strings::StringStrToIntTypeRule
+typerule STRING_CODE ::CVC4::theory::strings::StringStrToIntTypeRule
 
 typerule STRING_IN_REGEXP ::CVC4::theory::strings::StringInRegExpTypeRule
 
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index c780caca2..b77a616b3 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -32,6 +32,7 @@
 
 using namespace std;
 using namespace CVC4::context;
+using namespace CVC4::kind;
 
 namespace CVC4 {
 namespace theory {
@@ -73,13 +74,14 @@ Node TheoryStrings::TermIndex::add( TNode n, unsigned index, TheoryStrings* t, N
   }
 }
 
-
-TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u,
-                             OutputChannel& out, Valuation valuation,
+TheoryStrings::TheoryStrings(context::Context* c,
+                             context::UserContext* u,
+                             OutputChannel& out,
+                             Valuation valuation,
                              const LogicInfo& logicInfo)
     : Theory(THEORY_STRINGS, c, u, out, valuation, logicInfo),
       RMAXINT(LONG_MAX),
-      d_notify( *this ),
+      d_notify(*this),
       d_equalityEngine(d_notify, c, "theory::strings", true),
       d_conflict(c, false),
       d_infer(c),
@@ -98,7 +100,8 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u,
       d_proxy_var(u),
       d_proxy_var_to_length(u),
       d_functionsTerms(c),
-      d_has_extf(c, false ),
+      d_has_extf(c, false),
+      d_has_str_code(false),
       d_regexp_memberships(c),
       d_regexp_ucached(u),
       d_regexp_ccached(c),
@@ -121,11 +124,13 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u,
   getExtTheory()->addFunctionKind(kind::STRING_STRREPL);
   getExtTheory()->addFunctionKind(kind::STRING_STRCTN);
   getExtTheory()->addFunctionKind(kind::STRING_IN_REGEXP);
+  getExtTheory()->addFunctionKind(kind::STRING_CODE);
 
   // The kinds we are treating as function application in congruence
-  d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP);
   d_equalityEngine.addFunctionKind(kind::STRING_LENGTH);
   d_equalityEngine.addFunctionKind(kind::STRING_CONCAT);
+  d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP);
+  d_equalityEngine.addFunctionKind(kind::STRING_CODE);
   if( options::stringLazyPreproc() ){
     d_equalityEngine.addFunctionKind(kind::STRING_STRCTN);
     d_equalityEngine.addFunctionKind(kind::STRING_SUBSTR);
@@ -410,7 +415,9 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) {
       if( d_preproc_cache.find( c_n )==d_preproc_cache.end() ){
         d_preproc_cache[ c_n ] = true;
         Trace("strings-process-debug") << "Process reduction for " << n << ", pol = " << pol << std::endl;
-        if( n.getKind()==kind::STRING_STRCTN && pol==1 ){
+        Kind k = n.getKind();
+        if (k == kind::STRING_STRCTN && pol == 1)
+        {
           Node x = n[0];
           Node s = n[1];
           //positive contains reduces to a equality
@@ -423,9 +430,13 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) {
           //we've reduced this n
           Trace("strings-extf-debug") << "  resolve extf : " << n << " based on positive contain reduction." << std::endl;
           return 1;
-        }else{
-          // for STRING_SUBSTR, STRING_STRCTN with pol=-1,
-          //     STRING_STRIDOF, STRING_ITOS, STRING_STOI, STRING_STRREPL
+        }
+        else if (k != kind::STRING_CODE)
+        {
+          Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF
+                 || k == STRING_ITOS
+                 || k == STRING_STOI
+                 || k == STRING_STRREPL);
           std::vector< Node > new_nodes;
           Node res = d_preproc.simplify( n, new_nodes );
           Assert( res!=n );
@@ -478,6 +489,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
     return false;
   }
 
+  NodeManager* nm = NodeManager::currentNM();
   // Generate model
   std::vector< Node > nodes;
   getEquivalenceClasses( nodes );
@@ -521,20 +533,45 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
   ////step 2 : assign arbitrary values for unknown lengths?
   // confirmed by calculus invariant, see paper
   Trace("strings-model") << "Assign to equivalence classes..." << std::endl;
+  std::map<Node, Node> pure_eq_assign;
   //step 3 : assign values to equivalence classes that are pure variables
   for( unsigned i=0; i<col.size(); i++ ){
     std::vector< Node > pure_eq;
     Trace("strings-model") << "The equivalence classes ";
-    for( unsigned j=0; j<col[i].size(); j++ ) {
-      Trace("strings-model") << col[i][j] << " ";
+    for (const Node& eqc : col[i])
+    {
+      Trace("strings-model") << eqc << " ";
       //check if col[i][j] has only variables
-      if( !col[i][j].isConst() ){
-        Assert( d_normal_forms.find( col[i][j] )!=d_normal_forms.end() );
-        if( d_normal_forms[col[i][j]].size()==1 ){//&& d_normal_forms[col[i][j]][0]==col[i][j] ){
-          pure_eq.push_back( col[i][j] );
+      if (!eqc.isConst())
+      {
+        Assert(d_normal_forms.find(eqc) != d_normal_forms.end());
+        if (d_normal_forms[eqc].size() == 1)
+        {
+          // does it have a code?
+          if (d_has_str_code)
+          {
+            EqcInfo* eip = getOrMakeEqcInfo(eqc, false);
+            if (eip && !eip->d_code_term.get().isNull())
+            {
+              // its value must be equal to its code
+              Node ct = nm->mkNode(kind::STRING_CODE, eip->d_code_term.get());
+              Node ctv = d_valuation.getModelValue(ct);
+              unsigned cvalue =
+                  ctv.getConst<Rational>().getNumerator().toUnsignedInt();
+              Trace("strings-model") << "(code: " << cvalue << ") ";
+              std::vector<unsigned> vec;
+              vec.push_back(String::convertCodeToUnsignedInt(cvalue));
+              Node mv = nm->mkConst(String(vec));
+              pure_eq_assign[eqc] = mv;
+              m->getEqualityEngine()->addTerm( mv );
+            }
+          }
+          pure_eq.push_back(eqc);
         }
-      }else{
-        processed[col[i][j]] = col[i][j];
+      }
+      else
+      {
+        processed[eqc] = eqc;
       }
     }
     Trace("strings-model") << "have length " << lts_values[i] << std::endl;
@@ -547,7 +584,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
           lvalue++;
         }
         Trace("strings-model") << "*** Decide to make length of " << lvalue << std::endl;
-        lts_values[i] = NodeManager::currentNM()->mkConst( Rational( lvalue ) );
+        lts_values[i] = nm->mkConst(Rational(lvalue));
         values_used[ lvalue ] = true;
       }
       Trace("strings-model") << "Need to assign values of length " << lts_values[i] << " to equivalence classes ";
@@ -556,22 +593,33 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
       }
       Trace("strings-model") << std::endl;
 
-
       //use type enumerator
       Assert(lts_values[i].getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in string model");
       StringEnumeratorLength sel(lts_values[i].getConst<Rational>().getNumerator().toUnsignedInt());
-      for( unsigned j=0; j<pure_eq.size(); j++ ){
-        Assert( !sel.isFinished() );
-        Node c = *sel;
-        while( d_equalityEngine.hasTerm( c ) ){
-          ++sel;
+      for (const Node& eqc : pure_eq)
+      {
+        Node c;
+        std::map<Node, Node>::iterator itp = pure_eq_assign.find(eqc);
+        if (itp == pure_eq_assign.end())
+        {
           Assert( !sel.isFinished() );
           c = *sel;
+          while (m->hasTerm(c))
+          {
+            ++sel;
+            Assert(!sel.isFinished());
+            c = *sel;
+          }
+          ++sel;
+        }
+        else
+        {
+          c = itp->second;
         }
-        ++sel;
-        Trace("strings-model") << "*** Assigned constant " << c << " for " << pure_eq[j] << std::endl;
-        processed[pure_eq[j]] = c;
-        if (!m->assertEquality(pure_eq[j], c, true))
+        Trace("strings-model") << "*** Assigned constant " << c << " for "
+                               << eqc << std::endl;
+        processed[eqc] = c;
+        if (!m->assertEquality(eqc, c, true))
         {
           return false;
         }
@@ -645,9 +693,9 @@ void TheoryStrings::preRegisterTerm(TNode n) {
         break;
       }
       default: {
+        registerTerm(n, 0);
         TypeNode tn = n.getType();
         if( tn.isString() ) {
-          registerTerm( n, 0 );
           // if finite model finding is enabled,
           // then we minimize the length of this term if it is a variable
           // but not an internally generated Skolem, or a term that does
@@ -840,8 +888,12 @@ void TheoryStrings::checkExtfReductions( int effort ) {
   }
 }
 
-TheoryStrings::EqcInfo::EqcInfo(  context::Context* c ) : d_length_term(c), d_cardinality_lem_k(c), d_normalized_length(c) {
-
+TheoryStrings::EqcInfo::EqcInfo(context::Context* c)
+    : d_length_term(c),
+      d_code_term(c),
+      d_cardinality_lem_k(c),
+      d_normalized_length(c)
+{
 }
 
 TheoryStrings::EqcInfo * TheoryStrings::getOrMakeEqcInfo( Node eqc, bool doMake ) {
@@ -874,11 +926,20 @@ void TheoryStrings::conflict(TNode a, TNode b){
 
 /** called when a new equivalance class is created */
 void TheoryStrings::eqNotifyNewClass(TNode t){
-  if( t.getKind() == kind::STRING_LENGTH ){
+  Kind k = t.getKind();
+  if (k == kind::STRING_LENGTH || k == kind::STRING_CODE)
+  {
     Trace("strings-debug") << "New length eqc : " << t << std::endl;
     Node r = d_equalityEngine.getRepresentative(t[0]);
     EqcInfo * ei = getOrMakeEqcInfo( r, true );
-    ei->d_length_term = t[0];
+    if (k == kind::STRING_LENGTH)
+    {
+      ei->d_length_term = t[0];
+    }
+    else
+    {
+      ei->d_code_term = t[0];
+    }
     //we care about the length of this string
     registerTerm( t[0], 1 );
   }else{
@@ -895,6 +956,10 @@ void TheoryStrings::eqNotifyPreMerge(TNode t1, TNode t2){
     if( !e2->d_length_term.get().isNull() ){
       e1->d_length_term.set( e2->d_length_term );
     }
+    if (!e2->d_code_term.get().isNull())
+    {
+      e1->d_code_term.set(e2->d_code_term);
+    }
     if( e2->d_cardinality_lem_k.get()>e1->d_cardinality_lem_k.get() ) {
       e1->d_cardinality_lem_k.set( e2->d_cardinality_lem_k );
     }
@@ -2013,64 +2078,164 @@ void TheoryStrings::checkNormalForms(){
       }
     }
   }
-  if( !hasProcessed() ){
-    Trace("strings-process") << "Normalize equivalence classes...." << std::endl;
-    //calculate normal forms for each equivalence class, possibly adding splitting lemmas
-    d_normal_forms.clear();
-    d_normal_forms_exp.clear();
-    std::map< Node, Node > nf_to_eqc;
-    std::map< Node, Node > eqc_to_nf;
-    std::map< Node, Node > eqc_to_exp;
-    for( unsigned i=0; i<d_strings_eqc.size(); i++ ) {
-      Node eqc = d_strings_eqc[i];
-      Trace("strings-process-debug") << "- Verify normal forms are the same for " << eqc << std::endl;
-      normalizeEquivalenceClass( eqc );
-      Trace("strings-debug") << "Finished normalizing eqc..." << std::endl;
+  if (hasProcessed())
+  {
+    return;
+  }
+  Trace("strings-process") << "Normalize equivalence classes...." << std::endl;
+  // calculate normal forms for each equivalence class, possibly adding
+  // splitting lemmas
+  d_normal_forms.clear();
+  d_normal_forms_exp.clear();
+  std::map<Node, Node> nf_to_eqc;
+  std::map<Node, Node> eqc_to_nf;
+  std::map<Node, Node> eqc_to_exp;
+  for (const Node& eqc : d_strings_eqc)
+  {
+    Trace("strings-process-debug") << "- Verify normal forms are the same for "
+                                   << eqc << std::endl;
+    normalizeEquivalenceClass(eqc);
+    Trace("strings-debug") << "Finished normalizing eqc..." << std::endl;
+    if (hasProcessed())
+    {
+      return;
+    }
+    Node nf_term = mkConcat(d_normal_forms[eqc]);
+    std::map<Node, Node>::iterator itn = nf_to_eqc.find(nf_term);
+    if (itn != nf_to_eqc.end())
+    {
+      // two equivalence classes have same normal form, merge
+      std::vector<Node> nf_exp;
+      nf_exp.push_back(mkAnd(d_normal_forms_exp[eqc]));
+      nf_exp.push_back(eqc_to_exp[itn->second]);
+      Node eq =
+          d_normal_forms_base[eqc].eqNode(d_normal_forms_base[itn->second]);
+      sendInference(nf_exp, eq, "Normal_Form");
       if( hasProcessed() ){
         return;
-      }else{
-        Node nf_term = mkConcat( d_normal_forms[eqc] );
-        std::map< Node, Node >::iterator itn = nf_to_eqc.find( nf_term );
-        if( itn!=nf_to_eqc.end() ){
-          //two equivalence classes have same normal form, merge
-          std::vector< Node > nf_exp;
-          nf_exp.push_back( mkAnd( d_normal_forms_exp[eqc] ) );
-          nf_exp.push_back( eqc_to_exp[itn->second] );
-          Node eq = d_normal_forms_base[eqc].eqNode( d_normal_forms_base[itn->second] );
-          sendInference( nf_exp, eq, "Normal_Form" );
-        } else {
-          nf_to_eqc[nf_term] = eqc;
-          eqc_to_nf[eqc] = nf_term;
-          eqc_to_exp[eqc] = mkAnd( d_normal_forms_exp[eqc] );
-        }
       }
-      Trace("strings-process-debug") << "Done verifying normal forms are the same for " << eqc << std::endl;
     }
-    if( !hasProcessed() ){
-      if(Trace.isOn("strings-nf")) {
-        Trace("strings-nf") << "**** Normal forms are : " << std::endl;
-        for( std::map< Node, Node >::iterator it = eqc_to_exp.begin(); it != eqc_to_exp.end(); ++it ){
-          Trace("strings-nf") << "  N[" << it->first << "] (base " << d_normal_forms_base[it->first] << ") = " << eqc_to_nf[it->first] << std::endl;
-          Trace("strings-nf") << "     exp: " << it->second << std::endl;
+    else
+    {
+      nf_to_eqc[nf_term] = eqc;
+      eqc_to_nf[eqc] = nf_term;
+      eqc_to_exp[eqc] = mkAnd(d_normal_forms_exp[eqc]);
+    }
+    Trace("strings-process-debug")
+        << "Done verifying normal forms are the same for " << eqc << std::endl;
+  }
+  if (Trace.isOn("strings-nf"))
+  {
+    Trace("strings-nf") << "**** Normal forms are : " << std::endl;
+    for (std::map<Node, Node>::iterator it = eqc_to_exp.begin();
+         it != eqc_to_exp.end();
+         ++it)
+    {
+      Trace("strings-nf") << "  N[" << it->first << "] (base "
+                          << d_normal_forms_base[it->first]
+                          << ") = " << eqc_to_nf[it->first] << std::endl;
+      Trace("strings-nf") << "     exp: " << it->second << std::endl;
+    }
+    Trace("strings-nf") << std::endl;
+  }
+  checkExtfEval(1);
+  Trace("strings-process-debug")
+      << "Done check extended functions re-eval, addedFact = "
+      << !d_pending.empty() << " " << !d_lemma_cache.empty()
+      << ", d_conflict = " << d_conflict << std::endl;
+  if (hasProcessed())
+  {
+    return;
+  }
+  if (!options::stringEagerLen())
+  {
+    checkLengthsEqc();
+    if (hasProcessed())
+    {
+      return;
+    }
+  }
+  // process disequalities between equivalence classes
+  checkDeqNF();
+  Trace("strings-process-debug")
+      << "Done check disequalities, addedFact = " << !d_pending.empty() << " "
+      << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
+  if (hasProcessed())
+  {
+    return;
+  }
+  // ensure that lemmas regarding str.code been added for each constant string
+  // of length one
+  if (d_has_str_code)
+  {
+    NodeManager* nm = NodeManager::currentNM();
+    // str.code applied to the code term for each equivalence class that has a
+    // code term but is not a constant
+    std::vector<Node> nconst_codes;
+    // str.code applied to the proxy variables for each equivalence classes that
+    // are constants of size one
+    std::vector<Node> const_codes;
+    for (const Node& eqc : d_strings_eqc)
+    {
+      if (d_normal_forms[eqc].size() == 1 && d_normal_forms[eqc][0].isConst())
+      {
+        Node c = d_normal_forms[eqc][0];
+        Trace("strings-code-debug") << "Get proxy variable for " << c
+                                    << std::endl;
+        Node cc = nm->mkNode(kind::STRING_CODE, c);
+        cc = Rewriter::rewrite(cc);
+        Assert(cc.isConst());
+        NodeNodeMap::const_iterator it = d_proxy_var.find(c);
+        AlwaysAssert(it != d_proxy_var.end());
+        Node vc = nm->mkNode(kind::STRING_CODE, (*it).second);
+        if (!areEqual(cc, vc))
+        {
+          sendInference(d_empty_vec, cc.eqNode(vc), "Code_Proxy");
         }
-        Trace("strings-nf") << std::endl;
+        const_codes.push_back(vc);
       }
-      checkExtfEval( 1 );
-      Trace("strings-process-debug") << "Done check extended functions re-eval, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
-      if( !hasProcessed() ){
-        if( !options::stringEagerLen() ){
-          checkLengthsEqc();
-          if( hasProcessed() ){
-            return;
-          }
+      else
+      {
+        EqcInfo* ei = getOrMakeEqcInfo(eqc, false);
+        if (ei && !ei->d_code_term.get().isNull())
+        {
+          Node vc = nm->mkNode(kind::STRING_CODE, ei->d_code_term.get());
+          nconst_codes.push_back(vc);
+        }
+      }
+    }
+    if (hasProcessed())
+    {
+      return;
+    }
+    // now, ensure that str.code is injective
+    std::vector<Node> cmps;
+    cmps.insert(cmps.end(), const_codes.rbegin(), const_codes.rend());
+    cmps.insert(cmps.end(), nconst_codes.rbegin(), nconst_codes.rend());
+    for (unsigned i = 0, num_ncc = nconst_codes.size(); i < num_ncc; i++)
+    {
+      Node c1 = nconst_codes[i];
+      cmps.pop_back();
+      for (const Node& c2 : cmps)
+      {
+        Trace("strings-code-debug") << "Compare codes : " << c1 << " " << c2
+                                    << std::endl;
+        if (!areDisequal(c1, c2))
+        {
+          Node eqn = c1[0].eqNode(c2[0]);
+          Node eq = c1.eqNode(c2);
+          Node inj_lem = nm->mkNode(kind::OR, eq.negate(), eqn);
+          sendInference(d_empty_vec, inj_lem, "Code_Inj");
         }
-        //process disequalities between equivalence classes
-        checkDeqNF();
-        Trace("strings-process-debug") << "Done check disequalities, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
       }
     }
-    Trace("strings-solve") << "Finished check normal forms, #lemmas = " << d_lemma_cache.size() << ", conflict = " << d_conflict << std::endl;
   }
+  Trace("strings-process-debug")
+      << "Done check code, addedFact = " << !d_pending.empty() << " "
+      << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
+  Trace("strings-solve") << "Finished check normal forms, #lemmas = "
+                         << d_lemma_cache.size()
+                         << ", conflict = " << d_conflict << std::endl;
 }
 
 //compute d_normal_forms_(base,exp,exp_depend)[eqc]
@@ -3278,21 +3443,25 @@ bool TheoryStrings::isNormalFormPair2( Node n1, Node n2 ) {
 }
 
 void TheoryStrings::registerTerm( Node n, int effort ) {
-  // 0 : upon preregistration or internal assertion
-  // 1 : upon occurrence in length term
-  // 2 : before normal form computation
-  // 3 : called on normal form terms
-  bool do_register = false;
-  if( options::stringEagerLen() ){
-    do_register = effort==0;
-  }else{
-    do_register = effort>0 || n.getKind()!=kind::STRING_CONCAT;
+  TypeNode tn = n.getType();
+  bool do_register = true;
+  if (!tn.isString())
+  {
+    if (options::stringEagerLen())
+    {
+      do_register = effort == 0;
+    }
+    else
+    {
+      do_register = effort > 0 || n.getKind() != kind::STRING_CONCAT;
+    }
   }
   if( do_register ){
     if(d_registered_terms_cache.find(n) == d_registered_terms_cache.end()) {
       d_registered_terms_cache.insert(n);
       Debug("strings-register") << "TheoryStrings::registerTerm() " << n << ", effort = " << effort << std::endl;
-      if(n.getType().isString()) {
+      if (tn.isString())
+      {
         //register length information:
         //  for variables, split on empty vs positive length
         //  for concat/const/replace, introduce proxy var and state length relation
@@ -3344,10 +3513,25 @@ void TheoryStrings::registerTerm( Node n, int effort ) {
           Trace("strings-lemma-debug") << "  prerewrite : " << skl.eqNode( lsum ) << std::endl;
           Trace("strings-assert") << "(assert " << ceq << ")" << std::endl;
           d_out->lemma(ceq);
-          
         }
-      } else {
-        AlwaysAssert(false, "String Terms only in registerTerm.");
+      }
+      else if (n.getKind() == kind::STRING_CODE)
+      {
+        d_has_str_code = true;
+        NodeManager* nm = NodeManager::currentNM();
+        // ite( str.len(s)==1, 0 <= str.code(s) < num_codes, str.code(s)=-1 )
+        Node neg_one = nm->mkConst(Rational(-1));
+        Node code_len = mkLength(n[0]).eqNode(d_one);
+        Node code_eq_neg1 = n.eqNode(nm->mkConst(Rational(-1)));
+        Node code_range = nm->mkNode(
+            kind::AND,
+            nm->mkNode(kind::GEQ, n, d_zero),
+            nm->mkNode(
+                kind::LT, n, nm->mkConst(Rational(CVC4::String::num_codes()))));
+        Node lem = nm->mkNode(kind::ITE, code_len, code_range, code_eq_neg1);
+        Trace("strings-lemma") << "Strings::Lemma CODE : " << lem << std::endl;
+        Trace("strings-assert") << "(assert " << lem << ")" << std::endl;
+        d_out->lemma(lem);
       }
     }
   }
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h
index 22406adef..85d2754a8 100644
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -294,18 +294,31 @@ private:
   EqualityStatus getEqualityStatus(TNode a, TNode b) override;
 
  private:
+  /** SAT-context-dependent information about an equivalence class */
   class EqcInfo {
   public:
     EqcInfo( context::Context* c );
     ~EqcInfo(){}
-    //constant in this eqc
+    /**
+     * If non-null, this is a term x from this eq class such that str.len( x )
+     * occurs as a term in this SAT context.
+     */
     context::CDO< Node > d_length_term;
+    /**
+     * If non-null, this is a term x from this eq class such that str.code( x )
+     * occurs as a term in this SAT context.
+     */
+    context::CDO<Node> d_code_term;
     context::CDO< unsigned > d_cardinality_lem_k;
-    // 1 = added length lemma
     context::CDO< Node > d_normalized_length;
   };
   /** map from representatives to information necessary for equivalence classes */
   std::map< Node, EqcInfo* > d_eqc_info;
+  /**
+   * Get the above information for equivalence class eqc. If doMake is true,
+   * we construct a new information class if one does not exist. The term eqc
+   * should currently be a representative of the equality engine of this class.
+   */
   EqcInfo * getOrMakeEqcInfo( Node eqc, bool doMake = true );
   //maintain which concat terms have the length lemma instantiated
   NodeNodeMap d_proxy_var;
@@ -315,6 +328,8 @@ private:
 private:
   //any non-reduced extended functions exist
   context::CDO< bool > d_has_extf;
+  /** have we asserted any str.code terms? */
+  bool d_has_str_code;
   // static information about extf
   class ExtfInfo {
   public:
@@ -448,7 +463,24 @@ private:
   void addToExplanation(Node a, Node b, std::vector<Node>& exp);
   void addToExplanation(Node lit, std::vector<Node>& exp);
 
-  // register term
+  /** Register term
+   *
+   * This performs SAT-context-independent registration for a term n, which
+   * may cause lemmas to be sent on the output channel that involve
+   * "initial refinement lemmas" for n. This includes introducing proxy
+   * variables for string terms and asserting that str.code terms are within
+   * proper bounds.
+   *
+   * Effort is one of the following (TODO make enum #1881):
+   * 0 : upon preregistration or internal assertion
+   * 1 : upon occurrence in length term
+   * 2 : before normal form computation
+   * 3 : called on normal form terms
+   *
+   * Based on the strategy, we may choose to add these initial refinement
+   * lemmas at one of the following efforts, where if it is not the given
+   * effort, the call to this method does nothing.
+   */
   void registerTerm(Node n, int effort);
   // send lemma
   void sendInference(std::vector<Node>& exp,
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index 1c885434c..cfd0f6e73 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -1224,6 +1224,10 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
   } else if(node.getKind() == kind::STRING_IN_REGEXP) {
     retNode = rewriteMembership(node);
   }
+  else if (node.getKind() == STRING_CODE)
+  {
+    retNode = rewriteStringCode(node);
+  }
 
   Trace("strings-postrewrite") << "Strings::postRewrite returning " << retNode << std::endl;
   if( orig!=retNode ){
@@ -2258,6 +2262,30 @@ Node TheoryStringsRewriter::rewritePrefixSuffix(Node n)
   return retNode;
 }
 
+Node TheoryStringsRewriter::rewriteStringCode(Node n)
+{
+  Assert(n.getKind() == kind::STRING_CODE);
+  if (n[0].isConst())
+  {
+    CVC4::String s = n[0].getConst<String>();
+    Node ret;
+    if (s.size() == 1)
+    {
+      std::vector<unsigned> vec = s.getVec();
+      Assert(vec.size() == 1);
+      ret = NodeManager::currentNM()->mkConst(
+          Rational(CVC4::String::convertUnsignedIntToCode(vec[0])));
+    }
+    else
+    {
+      ret = NodeManager::currentNM()->mkConst(Rational(-1));
+    }
+    return returnRewrite(n, ret, "code-eval");
+  }
+
+  return n;
+}
+
 void TheoryStringsRewriter::getConcat( Node n, std::vector< Node >& c ) {
   if( n.getKind()==kind::STRING_CONCAT || n.getKind()==kind::REGEXP_CONCAT ){
     for( unsigned i=0; i<n.getNumChildren(); i++ ){
diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h
index 9671fa815..f7e65f3a9 100644
--- a/src/theory/strings/theory_strings_rewriter.h
+++ b/src/theory/strings/theory_strings_rewriter.h
@@ -109,6 +109,12 @@ private:
   * Returns the rewritten form of node.
   */
   static Node rewritePrefixSuffix(Node node);
+  /** rewrite str.code
+   * This is the entry point for post-rewriting terms n of the form
+   *   str.code( t )
+   * Returns the rewritten form of node.
+   */
+  static Node rewriteStringCode(Node node);
 
   /** gets the "vector form" of term n, adds it to c.
   * For example:
diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h
index 03267abcd..b4629b338 100644
--- a/src/theory/strings/theory_strings_type_rules.h
+++ b/src/theory/strings/theory_strings_type_rules.h
@@ -228,7 +228,9 @@ public:
     if( check ) {
       TypeNode t = n[0].getType(check);
       if (!t.isString()) {
-        throw TypeCheckingExceptionPrivate(n, "expecting a string term in string to int 0");
+        std::stringstream ss;
+        ss << "expecting a string term in argument of " << n.getKind();
+        throw TypeCheckingExceptionPrivate(n, ss.str());
       }
     }
     return nodeManager->integerType();
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index 9aaad522a..4cbda5147 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -32,6 +32,32 @@ namespace CVC4 {
 
 static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
 
+unsigned String::convertCharToUnsignedInt(unsigned char c)
+{
+  return convertCodeToUnsignedInt(static_cast<unsigned>(c));
+}
+unsigned char String::convertUnsignedIntToChar(unsigned i)
+{
+  Assert(i < num_codes());
+  return static_cast<unsigned char>(convertUnsignedIntToCode(i));
+}
+bool String::isPrintable(unsigned i)
+{
+  Assert(i < num_codes());
+  unsigned char c = convertUnsignedIntToChar(i);
+  return (c >= ' ' && c <= '~');
+}
+unsigned String::convertCodeToUnsignedInt(unsigned c)
+{
+  Assert(c < num_codes());
+  return (c < start_code() ? c + num_codes() : c) - start_code();
+}
+unsigned String::convertUnsignedIntToCode(unsigned i)
+{
+  Assert(i < num_codes());
+  return (i + start_code()) % num_codes();
+}
+
 int String::cmp(const String &y) const {
   if (size() != y.size()) {
     return size() < y.size() ? -1 : 1;
diff --git a/src/util/regexp.h b/src/util/regexp.h
index 42d232259..c8b491475 100644
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -28,21 +28,43 @@
 
 namespace CVC4 {
 
+/** The CVC4 string class
+ *
+ * This data structure is the domain of values for the string type. It can also
+ * be used as a generic utility for representing strings.
+ */
 class CVC4_PUBLIC String {
  public:
-  static unsigned convertCharToUnsignedInt(unsigned char c) {
-    unsigned i = c;
-    i = i + 191;
-    return (i >= 256 ? i - 256 : i);
-  }
-  static unsigned char convertUnsignedIntToChar(unsigned i) {
-    unsigned ii = i + 65;
-    return (unsigned char)(ii >= 256 ? ii - 256 : ii);
-  }
-  static bool isPrintable(unsigned i) {
-    unsigned char c = convertUnsignedIntToChar(i);
-    return (c >= ' ' && c <= '~');  // isprint( (int)c );
-  }
+  /**
+   * The start ASCII code. In our string representation below, we represent
+   * characters using a vector d_vec of unsigned integers. We refer to this as
+   * the "internal representation" for the string.
+   *
+   * We make unsigned integer 0 correspond to the 65th character ("A") in the
+   * ASCII alphabet to make models intuitive. In particular, say if we have
+   * a set of string variables that are distinct but otherwise unconstrained,
+   * then the model may assign them "A", "B", "C", ...
+   */
+  static inline unsigned start_code() { return 65; }
+  /**
+   * This is the cardinality of the alphabet that is representable by this
+   * class. Notice that this must be greater than or equal to the cardinality
+   * of the alphabet that the string theory reasons about.
+   */
+  static inline unsigned num_codes() { return 256; }
+  /**
+   * Convert unsigned char to the unsigned used in the internal representation
+   * in d_vec below.
+   */
+  static unsigned convertCharToUnsignedInt(unsigned char c);
+  /** Convert the internal unsigned to a unsigned char. */
+  static unsigned char convertUnsignedIntToChar(unsigned i);
+  /** Does the internal unsigned correspond to a printable character? */
+  static bool isPrintable(unsigned i);
+  /** get the internal unsigned for ASCII code c. */
+  static unsigned convertCodeToUnsignedInt(unsigned c);
+  /** get the ASCII code number that internal unsigned i corresponds to. */
+  static unsigned convertUnsignedIntToCode(unsigned i);
 
   /** constructors for String
   *
diff --git a/test/regress/Makefile.tests b/test/regress/Makefile.tests
index 0a0732753..8584eeca9 100644
--- a/test/regress/Makefile.tests
+++ b/test/regress/Makefile.tests
@@ -1410,6 +1410,7 @@ REG1_TESTS = \
 	regress1/strings/cmu-5042-0707-2.smt2 \
 	regress1/strings/cmu-inc-nlpp-071516.smt2 \
 	regress1/strings/cmu-substr-rw.smt2 \
+	regress1/strings/code-sequence.smt2 \
 	regress1/strings/crash-1019.smt2 \
 	regress1/strings/csp-prefix-exp-bug.smt2 \
 	regress1/strings/double-replace.smt2 \
@@ -1452,6 +1453,10 @@ REG1_TESTS = \
 	regress1/strings/string-unsound-sem.smt2 \
 	regress1/strings/strings-index-empty.smt2 \
 	regress1/strings/strip-endpt-sound.smt2 \
+	regress1/strings/str-code-sat.smt2 \
+	regress1/strings/str-code-unsat.smt2 \
+	regress1/strings/str-code-unsat-2.smt2 \
+	regress1/strings/str-code-unsat-3.smt2 \
 	regress1/strings/substr001.smt2 \
 	regress1/strings/type002.smt2 \
 	regress1/strings/type003.smt2 \
diff --git a/test/regress/regress1/strings/code-sequence.smt2 b/test/regress/regress1/strings/code-sequence.smt2
new file mode 100644
index 000000000..2654017d4
--- /dev/null
+++ b/test/regress/regress1/strings/code-sequence.smt2
@@ -0,0 +1,14 @@
+(set-logic SLIA)
+(set-option :strings-exp true)
+(set-option :strings-fmf true)
+(set-option :fmf-bound true)
+(set-info :status sat)
+(declare-fun x () String)
+(assert (forall ((n Int)) (=> (and (<= 0 n) (< n (str.len x))) 
+(and
+(<= 60 (str.code (str.at x n)))
+(<= (str.code (str.at x n)) 90)
+(< (+ 1 (str.code (str.at x (- n 1)))) (str.code (str.at x n)))
+))))
+(assert (> (str.len x) 3))
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-sat.smt2 b/test/regress/regress1/strings/str-code-sat.smt2
new file mode 100644
index 000000000..1acc091c1
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-sat.smt2
@@ -0,0 +1,24 @@
+(set-logic QF_SLIA)
+(set-info :status sat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(declare-fun w () String)
+
+(assert (>= (str.code x) 65))
+(assert (<= (str.code x) 75))
+
+(assert (>= (str.code y) 65))
+(assert (<= (str.code y) 75))
+
+(assert (>= (str.code z) 65))
+(assert (<= (str.code z) 75))
+
+(assert (= (str.len w) 1))
+
+(assert (= (+ (str.code x) (str.code y)) 140))
+(assert (= (+ (str.code x) (str.code z)) 141))
+
+(assert (distinct x y z w "A" "B" "C" "D" "AA"))
+
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-unsat-2.smt2 b/test/regress/regress1/strings/str-code-unsat-2.smt2
new file mode 100644
index 000000000..38116061e
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-unsat-2.smt2
@@ -0,0 +1,6 @@
+(set-logic QF_SLIA)
+(set-info :status unsat)
+(declare-fun x () String)
+(assert (= (str.len x) 1))
+(assert (or (< (str.code x) 0) (> (str.code x) 10000000000000000000000000000)))
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-unsat-3.smt2 b/test/regress/regress1/strings/str-code-unsat-3.smt2
new file mode 100644
index 000000000..fa34785c2
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-unsat-3.smt2
@@ -0,0 +1,21 @@
+(set-logic QF_SLIA)
+(set-info :status unsat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+
+(assert (>= (str.code x) 65))
+(assert (<= (str.code x) 75))
+
+(assert (>= (str.code y) 65))
+(assert (<= (str.code y) 75))
+
+(assert (>= (str.code z) 65))
+(assert (<= (str.code z) 75))
+
+(assert (= (+ (str.code x) (str.code y)) 140))
+(assert (= (+ (str.code x) (str.code z)) 141))
+
+(assert (distinct x y z "B" "C" "D" "E"))
+
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-unsat.smt2 b/test/regress/regress1/strings/str-code-unsat.smt2
new file mode 100644
index 000000000..6a3e9062b
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-unsat.smt2
@@ -0,0 +1,21 @@
+(set-logic QF_SLIA)
+(set-info :status unsat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+
+(assert (>= (str.code x) 65))
+(assert (<= (str.code x) 75))
+
+(assert (>= (str.code y) 65))
+(assert (<= (str.code y) 75))
+
+(assert (>= (str.code z) 65))
+(assert (<= (str.code z) 75))
+
+(assert (= (+ (str.code x) (str.code y)) 140))
+(assert (= (+ (str.code x) (str.code z)) 140))
+
+(assert (distinct x y z))
+
+(check-sat)
-- 
2.30.2