Add support for str.code (#1821)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Mon, 7 May 2018 15:22:27 +0000 (10:22 -0500)

committer GitHub <noreply@github.com>

Mon, 7 May 2018 15:22:27 +0000 (10:22 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Mon, 7 May 2018 15:22:27 +0000 (10:22 -0500)
committer GitHub <noreply@github.com>
Mon, 7 May 2018 15:22:27 +0000 (10:22 -0500)
diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp

index 1d66ab0c1819be919489032a94c993fc3a6e7c22..09dccdfbdf96a68fdbfaceb5ff7a1d1499a13684 100644 (file)
--- a/src/parser/smt2/smt2.cpp
+++ b/src/parser/smt2/smt2.cpp
@@ -153,6 +153,7 @@ void Smt2::addStringOperators() {
    addOperator(kind::REGEXP_OPT, "re.opt");
    addOperator(kind::REGEXP_RANGE, "re.range");
    addOperator(kind::REGEXP_LOOP, "re.loop");
+  addOperator(kind::STRING_CODE, "str.code");
  }
  
  void Smt2::addFloatingPointOperators() {
diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp

index 96bee9724eb37a3ad3d9239bfbd2fe4091219334..2c6a2633527e32f6a6d3d0291ff417d790de5801 100644 (file)
--- a/src/printer/smt2/smt2_printer.cpp
+++ b/src/printer/smt2/smt2_printer.cpp
@@ -523,6 +523,7 @@ void Smt2Printer::toStream(std::ostream& out,
    case kind::STRING_SUFFIX:
    case kind::STRING_ITOS:
    case kind::STRING_STOI:
+  case kind::STRING_CODE:
    case kind::STRING_TO_REGEXP:
    case kind::REGEXP_CONCAT:
    case kind::REGEXP_UNION:
diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds

index 15dd5b423f39f0cd0d3b3841d351e2f1609943a9..34237f69e777ec2f284248cfdbd4369c1fe9d795 100644 (file)
--- a/src/theory/strings/kinds
+++ b/src/theory/strings/kinds
@@ -24,6 +24,7 @@ operator STRING_PREFIX 2 "string prefixof"
  operator STRING_SUFFIX 2 "string suffixof"
  operator STRING_ITOS 1 "integer to string"
  operator STRING_STOI 1 "string to integer (total function)"
+operator STRING_CODE 1 "string to code, returns the code of the first character of the string if it has length one, -1 otherwise"
  
  #sort CHAR_TYPE \
  #    Cardinality::INTEGERS \
@@ -103,6 +104,7 @@ typerule STRING_PREFIX ::CVC4::theory::strings::StringPrefixOfTypeRule
  typerule STRING_SUFFIX ::CVC4::theory::strings::StringSuffixOfTypeRule
  typerule STRING_ITOS ::CVC4::theory::strings::StringIntToStrTypeRule
  typerule STRING_STOI ::CVC4::theory::strings::StringStrToIntTypeRule
+typerule STRING_CODE ::CVC4::theory::strings::StringStrToIntTypeRule
  
  typerule STRING_IN_REGEXP ::CVC4::theory::strings::StringInRegExpTypeRule
  
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp

index c780caca2229910a082ecbeb6755a5f84a55ae13..b77a616b36e95b3bb3e8bef1db355c5cd00a5501 100644 (file)
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -32,6 +32,7 @@
  
  using namespace std;
  using namespace CVC4::context;
+using namespace CVC4::kind;
  
  namespace CVC4 {
  namespace theory {
@@ -73,13 +74,14 @@ Node TheoryStrings::TermIndex::add( TNode n, unsigned index, TheoryStrings* t, N
    }
  }
  
-
-TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u,
-                             OutputChannel& out, Valuation valuation,
+TheoryStrings::TheoryStrings(context::Context* c,
+                             context::UserContext* u,
+                             OutputChannel& out,
+                             Valuation valuation,
                               const LogicInfo& logicInfo)
      : Theory(THEORY_STRINGS, c, u, out, valuation, logicInfo),
        RMAXINT(LONG_MAX),
-      d_notify( *this ),
+      d_notify(*this),
        d_equalityEngine(d_notify, c, "theory::strings", true),
        d_conflict(c, false),
        d_infer(c),
@@ -98,7 +100,8 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u,
        d_proxy_var(u),
        d_proxy_var_to_length(u),
        d_functionsTerms(c),
-      d_has_extf(c, false ),
+      d_has_extf(c, false),
+      d_has_str_code(false),
        d_regexp_memberships(c),
        d_regexp_ucached(u),
        d_regexp_ccached(c),
@@ -121,11 +124,13 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u,
    getExtTheory()->addFunctionKind(kind::STRING_STRREPL);
    getExtTheory()->addFunctionKind(kind::STRING_STRCTN);
    getExtTheory()->addFunctionKind(kind::STRING_IN_REGEXP);
+  getExtTheory()->addFunctionKind(kind::STRING_CODE);
  
    // The kinds we are treating as function application in congruence
-  d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP);
    d_equalityEngine.addFunctionKind(kind::STRING_LENGTH);
    d_equalityEngine.addFunctionKind(kind::STRING_CONCAT);
+  d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP);
+  d_equalityEngine.addFunctionKind(kind::STRING_CODE);
    if( options::stringLazyPreproc() ){
      d_equalityEngine.addFunctionKind(kind::STRING_STRCTN);
      d_equalityEngine.addFunctionKind(kind::STRING_SUBSTR);
@@ -410,7 +415,9 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) {
        if( d_preproc_cache.find( c_n )==d_preproc_cache.end() ){
          d_preproc_cache[ c_n ] = true;
          Trace("strings-process-debug") << "Process reduction for " << n << ", pol = " << pol << std::endl;
-        if( n.getKind()==kind::STRING_STRCTN && pol==1 ){
+        Kind k = n.getKind();
+        if (k == kind::STRING_STRCTN && pol == 1)
+        {
            Node x = n[0];
            Node s = n[1];
            //positive contains reduces to a equality
@@ -423,9 +430,13 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) {
            //we've reduced this n
            Trace("strings-extf-debug") << "  resolve extf : " << n << " based on positive contain reduction." << std::endl;
            return 1;
-        }else{
-          // for STRING_SUBSTR, STRING_STRCTN with pol=-1,
-          //     STRING_STRIDOF, STRING_ITOS, STRING_STOI, STRING_STRREPL
+        }
+        else if (k != kind::STRING_CODE)
+        {
+          Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF
+                 || k == STRING_ITOS
+                 || k == STRING_STOI
+                 || k == STRING_STRREPL);
            std::vector< Node > new_nodes;
            Node res = d_preproc.simplify( n, new_nodes );
            Assert( res!=n );
@@ -478,6 +489,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
      return false;
    }
  
+  NodeManager* nm = NodeManager::currentNM();
    // Generate model
    std::vector< Node > nodes;
    getEquivalenceClasses( nodes );
@@ -521,20 +533,45 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
    ////step 2 : assign arbitrary values for unknown lengths?
    // confirmed by calculus invariant, see paper
    Trace("strings-model") << "Assign to equivalence classes..." << std::endl;
+  std::map<Node, Node> pure_eq_assign;
    //step 3 : assign values to equivalence classes that are pure variables
    for( unsigned i=0; i<col.size(); i++ ){
      std::vector< Node > pure_eq;
      Trace("strings-model") << "The equivalence classes ";
-    for( unsigned j=0; j<col[i].size(); j++ ) {
-      Trace("strings-model") << col[i][j] << " ";
+    for (const Node& eqc : col[i])
+    {
+      Trace("strings-model") << eqc << " ";
        //check if col[i][j] has only variables
-      if( !col[i][j].isConst() ){
-        Assert( d_normal_forms.find( col[i][j] )!=d_normal_forms.end() );
-        if( d_normal_forms[col[i][j]].size()==1 ){//&& d_normal_forms[col[i][j]][0]==col[i][j] ){
-          pure_eq.push_back( col[i][j] );
+      if (!eqc.isConst())
+      {
+        Assert(d_normal_forms.find(eqc) != d_normal_forms.end());
+        if (d_normal_forms[eqc].size() == 1)
+        {
+          // does it have a code?
+          if (d_has_str_code)
+          {
+            EqcInfo* eip = getOrMakeEqcInfo(eqc, false);
+            if (eip && !eip->d_code_term.get().isNull())
+            {
+              // its value must be equal to its code
+              Node ct = nm->mkNode(kind::STRING_CODE, eip->d_code_term.get());
+              Node ctv = d_valuation.getModelValue(ct);
+              unsigned cvalue =
+                  ctv.getConst<Rational>().getNumerator().toUnsignedInt();
+              Trace("strings-model") << "(code: " << cvalue << ") ";
+              std::vector<unsigned> vec;
+              vec.push_back(String::convertCodeToUnsignedInt(cvalue));
+              Node mv = nm->mkConst(String(vec));
+              pure_eq_assign[eqc] = mv;
+              m->getEqualityEngine()->addTerm( mv );
+            }
+          }
+          pure_eq.push_back(eqc);
          }
-      }else{
-        processed[col[i][j]] = col[i][j];
+      }
+      else
+      {
+        processed[eqc] = eqc;
        }
      }
      Trace("strings-model") << "have length " << lts_values[i] << std::endl;
@@ -547,7 +584,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
            lvalue++;
          }
          Trace("strings-model") << "*** Decide to make length of " << lvalue << std::endl;
-        lts_values[i] = NodeManager::currentNM()->mkConst( Rational( lvalue ) );
+        lts_values[i] = nm->mkConst(Rational(lvalue));
          values_used[ lvalue ] = true;
        }
        Trace("strings-model") << "Need to assign values of length " << lts_values[i] << " to equivalence classes ";
@@ -556,22 +593,33 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
        }
        Trace("strings-model") << std::endl;
  
-
        //use type enumerator
        Assert(lts_values[i].getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in string model");
        StringEnumeratorLength sel(lts_values[i].getConst<Rational>().getNumerator().toUnsignedInt());
-      for( unsigned j=0; j<pure_eq.size(); j++ ){
-        Assert( !sel.isFinished() );
-        Node c = *sel;
-        while( d_equalityEngine.hasTerm( c ) ){
-          ++sel;
+      for (const Node& eqc : pure_eq)
+      {
+        Node c;
+        std::map<Node, Node>::iterator itp = pure_eq_assign.find(eqc);
+        if (itp == pure_eq_assign.end())
+        {
            Assert( !sel.isFinished() );
            c = *sel;
+          while (m->hasTerm(c))
+          {
+            ++sel;
+            Assert(!sel.isFinished());
+            c = *sel;
+          }
+          ++sel;
+        }
+        else
+        {
+          c = itp->second;
          }
-        ++sel;
-        Trace("strings-model") << "*** Assigned constant " << c << " for " << pure_eq[j] << std::endl;
-        processed[pure_eq[j]] = c;
-        if (!m->assertEquality(pure_eq[j], c, true))
+        Trace("strings-model") << "*** Assigned constant " << c << " for "
+                               << eqc << std::endl;
+        processed[eqc] = c;
+        if (!m->assertEquality(eqc, c, true))
          {
            return false;
          }
@@ -645,9 +693,9 @@ void TheoryStrings::preRegisterTerm(TNode n) {
          break;
        }
        default: {
+        registerTerm(n, 0);
          TypeNode tn = n.getType();
          if( tn.isString() ) {
-          registerTerm( n, 0 );
            // if finite model finding is enabled,
            // then we minimize the length of this term if it is a variable
            // but not an internally generated Skolem, or a term that does
@@ -840,8 +888,12 @@ void TheoryStrings::checkExtfReductions( int effort ) {
    }
  }
  
-TheoryStrings::EqcInfo::EqcInfo(  context::Context* c ) : d_length_term(c), d_cardinality_lem_k(c), d_normalized_length(c) {
-
+TheoryStrings::EqcInfo::EqcInfo(context::Context* c)
+    : d_length_term(c),
+      d_code_term(c),
+      d_cardinality_lem_k(c),
+      d_normalized_length(c)
+{
  }
  
  TheoryStrings::EqcInfo * TheoryStrings::getOrMakeEqcInfo( Node eqc, bool doMake ) {
@@ -874,11 +926,20 @@ void TheoryStrings::conflict(TNode a, TNode b){
  
  /** called when a new equivalance class is created */
  void TheoryStrings::eqNotifyNewClass(TNode t){
-  if( t.getKind() == kind::STRING_LENGTH ){
+  Kind k = t.getKind();
+  if (k == kind::STRING_LENGTH || k == kind::STRING_CODE)
+  {
      Trace("strings-debug") << "New length eqc : " << t << std::endl;
      Node r = d_equalityEngine.getRepresentative(t[0]);
      EqcInfo * ei = getOrMakeEqcInfo( r, true );
-    ei->d_length_term = t[0];
+    if (k == kind::STRING_LENGTH)
+    {
+      ei->d_length_term = t[0];
+    }
+    else
+    {
+      ei->d_code_term = t[0];
+    }
      //we care about the length of this string
      registerTerm( t[0], 1 );
    }else{
@@ -895,6 +956,10 @@ void TheoryStrings::eqNotifyPreMerge(TNode t1, TNode t2){
      if( !e2->d_length_term.get().isNull() ){
        e1->d_length_term.set( e2->d_length_term );
      }
+    if (!e2->d_code_term.get().isNull())
+    {
+      e1->d_code_term.set(e2->d_code_term);
+    }
      if( e2->d_cardinality_lem_k.get()>e1->d_cardinality_lem_k.get() ) {
        e1->d_cardinality_lem_k.set( e2->d_cardinality_lem_k );
      }
@@ -2013,64 +2078,164 @@ void TheoryStrings::checkNormalForms(){
        }
      }
    }
-  if( !hasProcessed() ){
-    Trace("strings-process") << "Normalize equivalence classes...." << std::endl;
-    //calculate normal forms for each equivalence class, possibly adding splitting lemmas
-    d_normal_forms.clear();
-    d_normal_forms_exp.clear();
-    std::map< Node, Node > nf_to_eqc;
-    std::map< Node, Node > eqc_to_nf;
-    std::map< Node, Node > eqc_to_exp;
-    for( unsigned i=0; i<d_strings_eqc.size(); i++ ) {
-      Node eqc = d_strings_eqc[i];
-      Trace("strings-process-debug") << "- Verify normal forms are the same for " << eqc << std::endl;
-      normalizeEquivalenceClass( eqc );
-      Trace("strings-debug") << "Finished normalizing eqc..." << std::endl;
+  if (hasProcessed())
+  {
+    return;
+  }
+  Trace("strings-process") << "Normalize equivalence classes...." << std::endl;
+  // calculate normal forms for each equivalence class, possibly adding
+  // splitting lemmas
+  d_normal_forms.clear();
+  d_normal_forms_exp.clear();
+  std::map<Node, Node> nf_to_eqc;
+  std::map<Node, Node> eqc_to_nf;
+  std::map<Node, Node> eqc_to_exp;
+  for (const Node& eqc : d_strings_eqc)
+  {
+    Trace("strings-process-debug") << "- Verify normal forms are the same for "
+                                   << eqc << std::endl;
+    normalizeEquivalenceClass(eqc);
+    Trace("strings-debug") << "Finished normalizing eqc..." << std::endl;
+    if (hasProcessed())
+    {
+      return;
+    }
+    Node nf_term = mkConcat(d_normal_forms[eqc]);
+    std::map<Node, Node>::iterator itn = nf_to_eqc.find(nf_term);
+    if (itn != nf_to_eqc.end())
+    {
+      // two equivalence classes have same normal form, merge
+      std::vector<Node> nf_exp;
+      nf_exp.push_back(mkAnd(d_normal_forms_exp[eqc]));
+      nf_exp.push_back(eqc_to_exp[itn->second]);
+      Node eq =
+          d_normal_forms_base[eqc].eqNode(d_normal_forms_base[itn->second]);
+      sendInference(nf_exp, eq, "Normal_Form");
        if( hasProcessed() ){
          return;
-      }else{
-        Node nf_term = mkConcat( d_normal_forms[eqc] );
-        std::map< Node, Node >::iterator itn = nf_to_eqc.find( nf_term );
-        if( itn!=nf_to_eqc.end() ){
-          //two equivalence classes have same normal form, merge
-          std::vector< Node > nf_exp;
-          nf_exp.push_back( mkAnd( d_normal_forms_exp[eqc] ) );
-          nf_exp.push_back( eqc_to_exp[itn->second] );
-          Node eq = d_normal_forms_base[eqc].eqNode( d_normal_forms_base[itn->second] );
-          sendInference( nf_exp, eq, "Normal_Form" );
-        } else {
-          nf_to_eqc[nf_term] = eqc;
-          eqc_to_nf[eqc] = nf_term;
-          eqc_to_exp[eqc] = mkAnd( d_normal_forms_exp[eqc] );
-        }
        }
-      Trace("strings-process-debug") << "Done verifying normal forms are the same for " << eqc << std::endl;
      }
-    if( !hasProcessed() ){
-      if(Trace.isOn("strings-nf")) {
-        Trace("strings-nf") << "**** Normal forms are : " << std::endl;
-        for( std::map< Node, Node >::iterator it = eqc_to_exp.begin(); it != eqc_to_exp.end(); ++it ){
-          Trace("strings-nf") << "  N[" << it->first << "] (base " << d_normal_forms_base[it->first] << ") = " << eqc_to_nf[it->first] << std::endl;
-          Trace("strings-nf") << "     exp: " << it->second << std::endl;
+    else
+    {
+      nf_to_eqc[nf_term] = eqc;
+      eqc_to_nf[eqc] = nf_term;
+      eqc_to_exp[eqc] = mkAnd(d_normal_forms_exp[eqc]);
+    }
+    Trace("strings-process-debug")
+        << "Done verifying normal forms are the same for " << eqc << std::endl;
+  }
+  if (Trace.isOn("strings-nf"))
+  {
+    Trace("strings-nf") << "**** Normal forms are : " << std::endl;
+    for (std::map<Node, Node>::iterator it = eqc_to_exp.begin();
+         it != eqc_to_exp.end();
+         ++it)
+    {
+      Trace("strings-nf") << "  N[" << it->first << "] (base "
+                          << d_normal_forms_base[it->first]
+                          << ") = " << eqc_to_nf[it->first] << std::endl;
+      Trace("strings-nf") << "     exp: " << it->second << std::endl;
+    }
+    Trace("strings-nf") << std::endl;
+  }
+  checkExtfEval(1);
+  Trace("strings-process-debug")
+      << "Done check extended functions re-eval, addedFact = "
+      << !d_pending.empty() << " " << !d_lemma_cache.empty()
+      << ", d_conflict = " << d_conflict << std::endl;
+  if (hasProcessed())
+  {
+    return;
+  }
+  if (!options::stringEagerLen())
+  {
+    checkLengthsEqc();
+    if (hasProcessed())
+    {
+      return;
+    }
+  }
+  // process disequalities between equivalence classes
+  checkDeqNF();
+  Trace("strings-process-debug")
+      << "Done check disequalities, addedFact = " << !d_pending.empty() << " "
+      << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
+  if (hasProcessed())
+  {
+    return;
+  }
+  // ensure that lemmas regarding str.code been added for each constant string
+  // of length one
+  if (d_has_str_code)
+  {
+    NodeManager* nm = NodeManager::currentNM();
+    // str.code applied to the code term for each equivalence class that has a
+    // code term but is not a constant
+    std::vector<Node> nconst_codes;
+    // str.code applied to the proxy variables for each equivalence classes that
+    // are constants of size one
+    std::vector<Node> const_codes;
+    for (const Node& eqc : d_strings_eqc)
+    {
+      if (d_normal_forms[eqc].size() == 1 && d_normal_forms[eqc][0].isConst())
+      {
+        Node c = d_normal_forms[eqc][0];
+        Trace("strings-code-debug") << "Get proxy variable for " << c
+                                    << std::endl;
+        Node cc = nm->mkNode(kind::STRING_CODE, c);
+        cc = Rewriter::rewrite(cc);
+        Assert(cc.isConst());
+        NodeNodeMap::const_iterator it = d_proxy_var.find(c);
+        AlwaysAssert(it != d_proxy_var.end());
+        Node vc = nm->mkNode(kind::STRING_CODE, (*it).second);
+        if (!areEqual(cc, vc))
+        {
+          sendInference(d_empty_vec, cc.eqNode(vc), "Code_Proxy");
          }
-        Trace("strings-nf") << std::endl;
+        const_codes.push_back(vc);
        }
-      checkExtfEval( 1 );
-      Trace("strings-process-debug") << "Done check extended functions re-eval, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
-      if( !hasProcessed() ){
-        if( !options::stringEagerLen() ){
-          checkLengthsEqc();
-          if( hasProcessed() ){
-            return;
-          }
+      else
+      {
+        EqcInfo* ei = getOrMakeEqcInfo(eqc, false);
+        if (ei && !ei->d_code_term.get().isNull())
+        {
+          Node vc = nm->mkNode(kind::STRING_CODE, ei->d_code_term.get());
+          nconst_codes.push_back(vc);
+        }
+      }
+    }
+    if (hasProcessed())
+    {
+      return;
+    }
+    // now, ensure that str.code is injective
+    std::vector<Node> cmps;
+    cmps.insert(cmps.end(), const_codes.rbegin(), const_codes.rend());
+    cmps.insert(cmps.end(), nconst_codes.rbegin(), nconst_codes.rend());
+    for (unsigned i = 0, num_ncc = nconst_codes.size(); i < num_ncc; i++)
+    {
+      Node c1 = nconst_codes[i];
+      cmps.pop_back();
+      for (const Node& c2 : cmps)
+      {
+        Trace("strings-code-debug") << "Compare codes : " << c1 << " " << c2
+                                    << std::endl;
+        if (!areDisequal(c1, c2))
+        {
+          Node eqn = c1[0].eqNode(c2[0]);
+          Node eq = c1.eqNode(c2);
+          Node inj_lem = nm->mkNode(kind::OR, eq.negate(), eqn);
+          sendInference(d_empty_vec, inj_lem, "Code_Inj");
          }
-        //process disequalities between equivalence classes
-        checkDeqNF();
-        Trace("strings-process-debug") << "Done check disequalities, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
        }
      }
-    Trace("strings-solve") << "Finished check normal forms, #lemmas = " << d_lemma_cache.size() << ", conflict = " << d_conflict << std::endl;
    }
+  Trace("strings-process-debug")
+      << "Done check code, addedFact = " << !d_pending.empty() << " "
+      << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
+  Trace("strings-solve") << "Finished check normal forms, #lemmas = "
+                         << d_lemma_cache.size()
+                         << ", conflict = " << d_conflict << std::endl;
  }
  
  //compute d_normal_forms_(base,exp,exp_depend)[eqc]
@@ -3278,21 +3443,25 @@ bool TheoryStrings::isNormalFormPair2( Node n1, Node n2 ) {
  }
  
  void TheoryStrings::registerTerm( Node n, int effort ) {
-  // 0 : upon preregistration or internal assertion
-  // 1 : upon occurrence in length term
-  // 2 : before normal form computation
-  // 3 : called on normal form terms
-  bool do_register = false;
-  if( options::stringEagerLen() ){
-    do_register = effort==0;
-  }else{
-    do_register = effort>0 || n.getKind()!=kind::STRING_CONCAT;
+  TypeNode tn = n.getType();
+  bool do_register = true;
+  if (!tn.isString())
+  {
+    if (options::stringEagerLen())
+    {
+      do_register = effort == 0;
+    }
+    else
+    {
+      do_register = effort > 0 || n.getKind() != kind::STRING_CONCAT;
+    }
    }
    if( do_register ){
      if(d_registered_terms_cache.find(n) == d_registered_terms_cache.end()) {
        d_registered_terms_cache.insert(n);
        Debug("strings-register") << "TheoryStrings::registerTerm() " << n << ", effort = " << effort << std::endl;
-      if(n.getType().isString()) {
+      if (tn.isString())
+      {
          //register length information:
          //  for variables, split on empty vs positive length
          //  for concat/const/replace, introduce proxy var and state length relation
@@ -3344,10 +3513,25 @@ void TheoryStrings::registerTerm( Node n, int effort ) {
            Trace("strings-lemma-debug") << "  prerewrite : " << skl.eqNode( lsum ) << std::endl;
            Trace("strings-assert") << "(assert " << ceq << ")" << std::endl;
            d_out->lemma(ceq);
-          
          }
-      } else {
-        AlwaysAssert(false, "String Terms only in registerTerm.");
+      }
+      else if (n.getKind() == kind::STRING_CODE)
+      {
+        d_has_str_code = true;
+        NodeManager* nm = NodeManager::currentNM();
+        // ite( str.len(s)==1, 0 <= str.code(s) < num_codes, str.code(s)=-1 )
+        Node neg_one = nm->mkConst(Rational(-1));
+        Node code_len = mkLength(n[0]).eqNode(d_one);
+        Node code_eq_neg1 = n.eqNode(nm->mkConst(Rational(-1)));
+        Node code_range = nm->mkNode(
+            kind::AND,
+            nm->mkNode(kind::GEQ, n, d_zero),
+            nm->mkNode(
+                kind::LT, n, nm->mkConst(Rational(CVC4::String::num_codes()))));
+        Node lem = nm->mkNode(kind::ITE, code_len, code_range, code_eq_neg1);
+        Trace("strings-lemma") << "Strings::Lemma CODE : " << lem << std::endl;
+        Trace("strings-assert") << "(assert " << lem << ")" << std::endl;
+        d_out->lemma(lem);
        }
      }
    }
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h

index 22406adef3a16352ea0a214a5c1a8b1c625066dc..85d2754a8fd02c2f8b608b93c7a676d9728a515b 100644 (file)
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -294,18 +294,31 @@ private:
    EqualityStatus getEqualityStatus(TNode a, TNode b) override;
  
   private:
+  /** SAT-context-dependent information about an equivalence class */
    class EqcInfo {
    public:
      EqcInfo( context::Context* c );
      ~EqcInfo(){}
-    //constant in this eqc
+    /**
+     * If non-null, this is a term x from this eq class such that str.len( x )
+     * occurs as a term in this SAT context.
+     */
      context::CDO< Node > d_length_term;
+    /**
+     * If non-null, this is a term x from this eq class such that str.code( x )
+     * occurs as a term in this SAT context.
+     */
+    context::CDO<Node> d_code_term;
      context::CDO< unsigned > d_cardinality_lem_k;
-    // 1 = added length lemma
      context::CDO< Node > d_normalized_length;
    };
    /** map from representatives to information necessary for equivalence classes */
    std::map< Node, EqcInfo* > d_eqc_info;
+  /**
+   * Get the above information for equivalence class eqc. If doMake is true,
+   * we construct a new information class if one does not exist. The term eqc
+   * should currently be a representative of the equality engine of this class.
+   */
    EqcInfo * getOrMakeEqcInfo( Node eqc, bool doMake = true );
    //maintain which concat terms have the length lemma instantiated
    NodeNodeMap d_proxy_var;
@@ -315,6 +328,8 @@ private:
  private:
    //any non-reduced extended functions exist
    context::CDO< bool > d_has_extf;
+  /** have we asserted any str.code terms? */
+  bool d_has_str_code;
    // static information about extf
    class ExtfInfo {
    public:
@@ -448,7 +463,24 @@ private:
    void addToExplanation(Node a, Node b, std::vector<Node>& exp);
    void addToExplanation(Node lit, std::vector<Node>& exp);
  
-  // register term
+  /** Register term
+   *
+   * This performs SAT-context-independent registration for a term n, which
+   * may cause lemmas to be sent on the output channel that involve
+   * "initial refinement lemmas" for n. This includes introducing proxy
+   * variables for string terms and asserting that str.code terms are within
+   * proper bounds.
+   *
+   * Effort is one of the following (TODO make enum #1881):
+   * 0 : upon preregistration or internal assertion
+   * 1 : upon occurrence in length term
+   * 2 : before normal form computation
+   * 3 : called on normal form terms
+   *
+   * Based on the strategy, we may choose to add these initial refinement
+   * lemmas at one of the following efforts, where if it is not the given
+   * effort, the call to this method does nothing.
+   */
    void registerTerm(Node n, int effort);
    // send lemma
    void sendInference(std::vector<Node>& exp,
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp

index 1c885434cfd149e5b50af17b9151516f8a4ed918..cfd0f6e73fd06eea320a1cf5d54acf674ed0cd29 100644 (file)
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -1224,6 +1224,10 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
    } else if(node.getKind() == kind::STRING_IN_REGEXP) {
      retNode = rewriteMembership(node);
    }
+  else if (node.getKind() == STRING_CODE)
+  {
+    retNode = rewriteStringCode(node);
+  }
  
    Trace("strings-postrewrite") << "Strings::postRewrite returning " << retNode << std::endl;
    if( orig!=retNode ){
@@ -2258,6 +2262,30 @@ Node TheoryStringsRewriter::rewritePrefixSuffix(Node n)
    return retNode;
  }
  
+Node TheoryStringsRewriter::rewriteStringCode(Node n)
+{
+  Assert(n.getKind() == kind::STRING_CODE);
+  if (n[0].isConst())
+  {
+    CVC4::String s = n[0].getConst<String>();
+    Node ret;
+    if (s.size() == 1)
+    {
+      std::vector<unsigned> vec = s.getVec();
+      Assert(vec.size() == 1);
+      ret = NodeManager::currentNM()->mkConst(
+          Rational(CVC4::String::convertUnsignedIntToCode(vec[0])));
+    }
+    else
+    {
+      ret = NodeManager::currentNM()->mkConst(Rational(-1));
+    }
+    return returnRewrite(n, ret, "code-eval");
+  }
+
+  return n;
+}
+
  void TheoryStringsRewriter::getConcat( Node n, std::vector< Node >& c ) {
    if( n.getKind()==kind::STRING_CONCAT || n.getKind()==kind::REGEXP_CONCAT ){
      for( unsigned i=0; i<n.getNumChildren(); i++ ){
diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h

index 9671fa815430d0772c715e87237f5fecbee38187..f7e65f3a9914d5fbcb6b7f31802f0e5571e3747d 100644 (file)
--- a/src/theory/strings/theory_strings_rewriter.h
+++ b/src/theory/strings/theory_strings_rewriter.h
@@ -109,6 +109,12 @@ private:
    * Returns the rewritten form of node.
    */
    static Node rewritePrefixSuffix(Node node);
+  /** rewrite str.code
+   * This is the entry point for post-rewriting terms n of the form
+   *   str.code( t )
+   * Returns the rewritten form of node.
+   */
+  static Node rewriteStringCode(Node node);
  
    /** gets the "vector form" of term n, adds it to c.
    * For example:
diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h

index 03267abcda26e690bbdd33b9c0b09eac995c2948..b4629b3383b7c7a914452f44abf6364e5104f859 100644 (file)
--- a/src/theory/strings/theory_strings_type_rules.h
+++ b/src/theory/strings/theory_strings_type_rules.h
@@ -228,7 +228,9 @@ public:
      if( check ) {
        TypeNode t = n[0].getType(check);
        if (!t.isString()) {
-        throw TypeCheckingExceptionPrivate(n, "expecting a string term in string to int 0");
+        std::stringstream ss;
+        ss << "expecting a string term in argument of " << n.getKind();
+        throw TypeCheckingExceptionPrivate(n, ss.str());
        }
      }
      return nodeManager->integerType();
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp

index 9aaad522a234ed78525f360d0716ea98db0f2808..4cbda514763c75ef1e980ca6f29f759b5cfc30b9 100644 (file)
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -32,6 +32,32 @@ namespace CVC4 {
  
  static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
  
+unsigned String::convertCharToUnsignedInt(unsigned char c)
+{
+  return convertCodeToUnsignedInt(static_cast<unsigned>(c));
+}
+unsigned char String::convertUnsignedIntToChar(unsigned i)
+{
+  Assert(i < num_codes());
+  return static_cast<unsigned char>(convertUnsignedIntToCode(i));
+}
+bool String::isPrintable(unsigned i)
+{
+  Assert(i < num_codes());
+  unsigned char c = convertUnsignedIntToChar(i);
+  return (c >= ' ' && c <= '~');
+}
+unsigned String::convertCodeToUnsignedInt(unsigned c)
+{
+  Assert(c < num_codes());
+  return (c < start_code() ? c + num_codes() : c) - start_code();
+}
+unsigned String::convertUnsignedIntToCode(unsigned i)
+{
+  Assert(i < num_codes());
+  return (i + start_code()) % num_codes();
+}
+
  int String::cmp(const String &y) const {
    if (size() != y.size()) {
      return size() < y.size() ? -1 : 1;
diff --git a/src/util/regexp.h b/src/util/regexp.h

index 42d23225901b68167b98297db1ef19c53d523827..c8b491475ce5584f0f626cf094f08b0477241f4d 100644 (file)
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -28,21 +28,43 @@
  
  namespace CVC4 {
  
+/** The CVC4 string class
+ *
+ * This data structure is the domain of values for the string type. It can also
+ * be used as a generic utility for representing strings.
+ */
  class CVC4_PUBLIC String {
   public:
-  static unsigned convertCharToUnsignedInt(unsigned char c) {
-    unsigned i = c;
-    i = i + 191;
-    return (i >= 256 ? i - 256 : i);
-  }
-  static unsigned char convertUnsignedIntToChar(unsigned i) {
-    unsigned ii = i + 65;
-    return (unsigned char)(ii >= 256 ? ii - 256 : ii);
-  }
-  static bool isPrintable(unsigned i) {
-    unsigned char c = convertUnsignedIntToChar(i);
-    return (c >= ' ' && c <= '~');  // isprint( (int)c );
-  }
+  /**
+   * The start ASCII code. In our string representation below, we represent
+   * characters using a vector d_vec of unsigned integers. We refer to this as
+   * the "internal representation" for the string.
+   *
+   * We make unsigned integer 0 correspond to the 65th character ("A") in the
+   * ASCII alphabet to make models intuitive. In particular, say if we have
+   * a set of string variables that are distinct but otherwise unconstrained,
+   * then the model may assign them "A", "B", "C", ...
+   */
+  static inline unsigned start_code() { return 65; }
+  /**
+   * This is the cardinality of the alphabet that is representable by this
+   * class. Notice that this must be greater than or equal to the cardinality
+   * of the alphabet that the string theory reasons about.
+   */
+  static inline unsigned num_codes() { return 256; }
+  /**
+   * Convert unsigned char to the unsigned used in the internal representation
+   * in d_vec below.
+   */
+  static unsigned convertCharToUnsignedInt(unsigned char c);
+  /** Convert the internal unsigned to a unsigned char. */
+  static unsigned char convertUnsignedIntToChar(unsigned i);
+  /** Does the internal unsigned correspond to a printable character? */
+  static bool isPrintable(unsigned i);
+  /** get the internal unsigned for ASCII code c. */
+  static unsigned convertCodeToUnsignedInt(unsigned c);
+  /** get the ASCII code number that internal unsigned i corresponds to. */
+  static unsigned convertUnsignedIntToCode(unsigned i);
  
    /** constructors for String
    *
diff --git a/test/regress/Makefile.tests b/test/regress/Makefile.tests

index 0a0732753fab01129a5170279c77deca2283fee4..8584eeca9067451d2443ba68000f588e41ee5eb5 100644 (file)
--- a/test/regress/Makefile.tests
+++ b/test/regress/Makefile.tests
@@ -1410,6 +1410,7 @@ REG1_TESTS = \
         regress1/strings/cmu-5042-0707-2.smt2 \
         regress1/strings/cmu-inc-nlpp-071516.smt2 \
         regress1/strings/cmu-substr-rw.smt2 \
+       regress1/strings/code-sequence.smt2 \
         regress1/strings/crash-1019.smt2 \
         regress1/strings/csp-prefix-exp-bug.smt2 \
         regress1/strings/double-replace.smt2 \
@@ -1452,6 +1453,10 @@ REG1_TESTS = \
         regress1/strings/string-unsound-sem.smt2 \
         regress1/strings/strings-index-empty.smt2 \
         regress1/strings/strip-endpt-sound.smt2 \
+       regress1/strings/str-code-sat.smt2 \
+       regress1/strings/str-code-unsat.smt2 \
+       regress1/strings/str-code-unsat-2.smt2 \
+       regress1/strings/str-code-unsat-3.smt2 \
         regress1/strings/substr001.smt2 \
         regress1/strings/type002.smt2 \
         regress1/strings/type003.smt2 \
diff --git a/test/regress/regress1/strings/code-sequence.smt2 b/test/regress/regress1/strings/code-sequence.smt2

new file mode 100644 (file)

index 0000000..2654017
--- /dev/null
+++ b/test/regress/regress1/strings/code-sequence.smt2
@@ -0,0 +1,14 @@
+(set-logic SLIA)
+(set-option :strings-exp true)
+(set-option :strings-fmf true)
+(set-option :fmf-bound true)
+(set-info :status sat)
+(declare-fun x () String)
+(assert (forall ((n Int)) (=> (and (<= 0 n) (< n (str.len x))) 
+(and
+(<= 60 (str.code (str.at x n)))
+(<= (str.code (str.at x n)) 90)
+(< (+ 1 (str.code (str.at x (- n 1)))) (str.code (str.at x n)))
+))))
+(assert (> (str.len x) 3))
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-sat.smt2 b/test/regress/regress1/strings/str-code-sat.smt2

new file mode 100644 (file)

index 0000000..1acc091
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-sat.smt2
@@ -0,0 +1,24 @@
+(set-logic QF_SLIA)
+(set-info :status sat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(declare-fun w () String)
+
+(assert (>= (str.code x) 65))
+(assert (<= (str.code x) 75))
+
+(assert (>= (str.code y) 65))
+(assert (<= (str.code y) 75))
+
+(assert (>= (str.code z) 65))
+(assert (<= (str.code z) 75))
+
+(assert (= (str.len w) 1))
+
+(assert (= (+ (str.code x) (str.code y)) 140))
+(assert (= (+ (str.code x) (str.code z)) 141))
+
+(assert (distinct x y z w "A" "B" "C" "D" "AA"))
+
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-unsat-2.smt2 b/test/regress/regress1/strings/str-code-unsat-2.smt2

new file mode 100644 (file)

index 0000000..3811606
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-unsat-2.smt2
@@ -0,0 +1,6 @@
+(set-logic QF_SLIA)
+(set-info :status unsat)
+(declare-fun x () String)
+(assert (= (str.len x) 1))
+(assert (or (< (str.code x) 0) (> (str.code x) 10000000000000000000000000000)))
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-unsat-3.smt2 b/test/regress/regress1/strings/str-code-unsat-3.smt2

new file mode 100644 (file)

index 0000000..fa34785
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-unsat-3.smt2
@@ -0,0 +1,21 @@
+(set-logic QF_SLIA)
+(set-info :status unsat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+
+(assert (>= (str.code x) 65))
+(assert (<= (str.code x) 75))
+
+(assert (>= (str.code y) 65))
+(assert (<= (str.code y) 75))
+
+(assert (>= (str.code z) 65))
+(assert (<= (str.code z) 75))
+
+(assert (= (+ (str.code x) (str.code y)) 140))
+(assert (= (+ (str.code x) (str.code z)) 141))
+
+(assert (distinct x y z "B" "C" "D" "E"))
+
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-unsat.smt2 b/test/regress/regress1/strings/str-code-unsat.smt2

new file mode 100644 (file)

index 0000000..6a3e906
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-unsat.smt2
@@ -0,0 +1,21 @@
+(set-logic QF_SLIA)
+(set-info :status unsat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+
+(assert (>= (str.code x) 65))
+(assert (<= (str.code x) 75))
+
+(assert (>= (str.code y) 65))
+(assert (<= (str.code y) 75))
+
+(assert (>= (str.code z) 65))
+(assert (<= (str.code z) 75))
+
+(assert (= (+ (str.code x) (str.code y)) 140))
+(assert (= (+ (str.code x) (str.code z)) 140))
+
+(assert (distinct x y z))
+
+(check-sat)
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Mon, 7 May 2018 15:22:27 +0000 (10:22 -0500)
committer	GitHub <noreply@github.com>
	Mon, 7 May 2018 15:22:27 +0000 (10:22 -0500)
src/parser/smt2/smt2.cpp		patch \| blob \| history
src/printer/smt2/smt2_printer.cpp		patch \| blob \| history
src/theory/strings/kinds		patch \| blob \| history
src/theory/strings/theory_strings.cpp		patch \| blob \| history
src/theory/strings/theory_strings.h		patch \| blob \| history
src/theory/strings/theory_strings_rewriter.cpp		patch \| blob \| history
src/theory/strings/theory_strings_rewriter.h		patch \| blob \| history
src/theory/strings/theory_strings_type_rules.h		patch \| blob \| history
src/util/regexp.cpp		patch \| blob \| history
src/util/regexp.h		patch \| blob \| history
test/regress/Makefile.tests		patch \| blob \| history
test/regress/regress1/strings/code-sequence.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/str-code-sat.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/str-code-unsat-2.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/str-code-unsat-3.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/str-code-unsat.smt2	[new file with mode: 0644]	patch \| blob