Support unicode internal representation and escape sequences (#3852)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Fri, 27 Mar 2020 14:01:38 +0000 (09:01 -0500)

committer GitHub <noreply@github.com>

Fri, 27 Mar 2020 14:01:38 +0000 (09:01 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Fri, 27 Mar 2020 14:01:38 +0000 (09:01 -0500)
committer GitHub <noreply@github.com>
Fri, 27 Mar 2020 14:01:38 +0000 (09:01 -0500)
diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g

index 82c0581cebec6316c56427b463109b92075b42be..03338961067c4eac7d3e7f983cee3351612276e9 100644 (file)
--- a/src/parser/cvc/Cvc.g
+++ b/src/parser/cvc/Cvc.g
@@ -2083,7 +2083,7 @@ stringTerm[CVC4::api::Term& f]
  
      /* string literal */
    | str[s]
-    { f = SOLVER->mkString(s, true); }
+    { f = PARSER_STATE->mkStringConstant(s); }
  
    | setsTerm[f]
    ;
diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp

index b36f36a935b0d55c5977a77f097be0dade5dae22..5dca923708c3f3e48c4460204269b7a56c753ee5 100644 (file)
--- a/src/parser/parser.cpp
+++ b/src/parser/parser.cpp
@@ -757,5 +757,153 @@ void Parser::attributeNotSupported(const std::string& attr) {
    }
  }
  
+std::vector<unsigned> Parser::processAdHocStringEsc(const std::string& s)
+{
+  std::vector<unsigned> str;
+  unsigned i = 0;
+  while (i < s.size())
+  {
+    // get the current character
+    if (s[i] != '\\')
+    {
+      // don't worry about printable here
+      str.push_back(static_cast<unsigned>(s[i]));
+      ++i;
+      continue;
+    }
+    // slash is always escaped
+    ++i;
+    if (i >= s.size())
+    {
+      // slash cannot be the last character if we are parsing escape sequences
+      std::stringstream serr;
+      serr << "Escape sequence at the end of string: \"" << s
+           << "\" should be handled by lexer";
+      parseError(serr.str());
+    }
+    switch (s[i])
+    {
+      case 'n':
+      {
+        str.push_back(static_cast<unsigned>('\n'));
+        i++;
+      }
+      break;
+      case 't':
+      {
+        str.push_back(static_cast<unsigned>('\t'));
+        i++;
+      }
+      break;
+      case 'v':
+      {
+        str.push_back(static_cast<unsigned>('\v'));
+        i++;
+      }
+      break;
+      case 'b':
+      {
+        str.push_back(static_cast<unsigned>('\b'));
+        i++;
+      }
+      break;
+      case 'r':
+      {
+        str.push_back(static_cast<unsigned>('\r'));
+        i++;
+      }
+      break;
+      case 'f':
+      {
+        str.push_back(static_cast<unsigned>('\f'));
+        i++;
+      }
+      break;
+      case 'a':
+      {
+        str.push_back(static_cast<unsigned>('\a'));
+        i++;
+      }
+      break;
+      case '\\':
+      {
+        str.push_back(static_cast<unsigned>('\\'));
+        i++;
+      }
+      break;
+      case 'x':
+      {
+        bool isValid = false;
+        if (i + 2 < s.size())
+        {
+          if (std::isxdigit(s[i + 1]) && std::isxdigit(s[i + 2]))
+          {
+            std::stringstream shex;
+            shex << s[i + 1] << s[i + 2];
+            unsigned val;
+            shex >> std::hex >> val;
+            str.push_back(val);
+            i += 3;
+            isValid = true;
+          }
+        }
+        if (!isValid)
+        {
+          std::stringstream serr;
+          serr << "Illegal String Literal: \"" << s
+               << "\", must have two digits after \\x";
+          parseError(serr.str());
+        }
+      }
+      break;
+      default:
+      {
+        if (std::isdigit(s[i]))
+        {
+          // octal escape sequences  TODO : revisit (issue #1251).
+          unsigned num = static_cast<unsigned>(s[i]) - 48;
+          bool flag = num < 4;
+          if (i + 1 < s.size() && num < 8 && std::isdigit(s[i + 1])
+              && s[i + 1] < '8')
+          {
+            num = num * 8 + static_cast<unsigned>(s[i + 1]) - 48;
+            if (flag && i + 2 < s.size() && std::isdigit(s[i + 2])
+                && s[i + 2] < '8')
+            {
+              num = num * 8 + static_cast<unsigned>(s[i + 2]) - 48;
+              str.push_back(num);
+              i += 3;
+            }
+            else
+            {
+              str.push_back(num);
+              i += 2;
+            }
+          }
+          else
+          {
+            str.push_back(num);
+            i++;
+          }
+        }
+      }
+    }
+  }
+  return str;
+}
+
+Expr Parser::mkStringConstant(const std::string& s)
+{
+  ExprManager* em = d_solver->getExprManager();
+  if (em->getOptions().getInputLanguage()
+      == language::input::LANG_SMTLIB_V2_6_1)
+  {
+    return d_solver->mkString(s, true).getExpr();
+  }
+  // otherwise, we must process ad-hoc escape sequences
+  std::vector<unsigned> str = processAdHocStringEsc(s);
+  return d_solver->mkString(str).getExpr();
+}
+
  } /* CVC4::parser namespace */
  } /* CVC4 namespace */
diff --git a/src/parser/parser.h b/src/parser/parser.h

index ecea4d3bd5810f59cd8c9afbd6115d503281d198..d6c0e0e158cc7688f36c143f50144e7d6f7396eb 100644 (file)
--- a/src/parser/parser.h
+++ b/src/parser/parser.h
@@ -889,6 +889,28 @@ public:
          name, api::sortVectorToTypes(argTypes));
    }
    //------------------------ end operator overloading
+  /**
+   * Make string constant
+   *
+   * This makes the string constant based on the string s. This may involve
+   * processing ad-hoc escape sequences (if the language is not
+   * SMT-LIB 2.6.1 or higher), or otherwise calling the solver to construct
+   * the string.
+   */
+  Expr mkStringConstant(const std::string& s);
+
+ private:
+  /** ad-hoc string escaping
+   *
+   * Returns the (internal) vector of code points corresponding to processing
+   * the escape sequences in string s. This is to support string inputs that
+   * do no comply with the SMT-LIB standard.
+   *
+   * This method handles escape sequences, including \n, \t, \v, \b, \r, \f, \a,
+   * \\, \x[N] and octal escape sequences of the form \[c1]([c2]([c3])?)? where
+   * c1, c2, c3 are digits from 0 to 7.
+   */
+  std::vector<unsigned> processAdHocStringEsc(const std::string& s);
  };/* class Parser */
  
  }/* CVC4::parser namespace */
diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g

index ec1eae7daa10588ce3861787c9bfe122c3f40c1c..69f21acb779a64110f163e5b02bfc081299e349f 100644 (file)
--- a/src/parser/smt2/Smt2.g
+++ b/src/parser/smt2/Smt2.g
@@ -2091,7 +2091,7 @@ termAtomic[CVC4::api::Term& atomTerm]
      }
  
    // String constant
-  | str[s,false] { atomTerm = SOLVER->mkString(s, true); }
+  | str[s,false] { atomTerm = PARSER_STATE->mkStringConstant(s); }
  
    // NOTE: Theory constants go here
  
diff --git a/src/preprocessing/passes/synth_rew_rules.cpp b/src/preprocessing/passes/synth_rew_rules.cpp

index 7b8e61359fd2baeeb442b9161b6a1e6ba1e8af34..f1e9e39c586ee329df0b223c6b5d11ff1a7566e5 100644 (file)
--- a/src/preprocessing/passes/synth_rew_rules.cpp
+++ b/src/preprocessing/passes/synth_rew_rules.cpp
@@ -169,7 +169,7 @@ PreprocessingPassResult SynthRewRulesPass::applyInternal(
        std::stringstream ssv;
        if (varCounter < 26)
        {
-        ssv << String::convertUnsignedIntToChar(varCounter + 32);
+        ssv << static_cast<char>(varCounter + 61);
        }
        else
        {
diff --git a/src/printer/cvc/cvc_printer.cpp b/src/printer/cvc/cvc_printer.cpp

index cad3c464007731535778d34df9f37835382a7120..1178c7299312134ac0d4ed476383e7e2cbef15c1 100644 (file)
--- a/src/printer/cvc/cvc_printer.cpp
+++ b/src/printer/cvc/cvc_printer.cpp
@@ -160,6 +160,11 @@ void CvcPrinter::toStream(
        toStreamRational(out, n, false);
        break;
      }
+    case kind::CONST_STRING:
+    {
+      out << '"' << n.getConst<String>().toString() << '"';
+      break;
+    }
      case kind::TYPE_CONSTANT:
        switch(TypeConstant tc = n.getConst<TypeConstant>()) {
        case REAL_TYPE:
diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp

index 541827f8999253e899ae6ca32f54753a54761890..6e4fcb63a6db9e48bbb47411d831198265030efb 100644 (file)
--- a/src/printer/smt2/smt2_printer.cpp
+++ b/src/printer/smt2/smt2_printer.cpp
@@ -202,7 +202,7 @@ void Smt2Printer::toStream(std::ostream& out,
      }
  
      case kind::CONST_STRING: {
-      std::string s = n.getConst<String>().toString(true);
+      std::string s = n.getConst<String>().toString();
        out << '"';
        for(size_t i = 0; i < s.size(); ++i) {
          char c = s[i];
diff --git a/src/theory/evaluator.cpp b/src/theory/evaluator.cpp

index b827912d58a5d608edcd1a4eed3fab17b9d10176..646f903f59f06627cc4e0a4197f1a5be217ffcba 100644 (file)
--- a/src/theory/evaluator.cpp
+++ b/src/theory/evaluator.cpp
@@ -626,8 +626,7 @@ EvalResult Evaluator::evalInternal(
            const String& s = results[currNode[0]].d_str;
            if (s.size() == 1)
            {
-            results[currNode] = EvalResult(
-                Rational(String::convertUnsignedIntToCode(s.getVec()[0])));
+            results[currNode] = EvalResult(Rational(s.getVec()[0]));
            }
            else
            {
diff --git a/src/theory/quantifiers/sygus_sampler.cpp b/src/theory/quantifiers/sygus_sampler.cpp

index 28cfa69dfd0514c41872c784fb6a61407f5640f5..e9c8588146a4dcd51bd40302ac3de7791e5b87f3 100644 (file)
--- a/src/theory/quantifiers/sygus_sampler.cpp
+++ b/src/theory/quantifiers/sygus_sampler.cpp
@@ -560,8 +560,7 @@ Node SygusSampler::getRandomValue(TypeNode tn)
        for (unsigned ch : alphas)
        {
          d_rstring_alphabet.push_back(ch);
-        Trace("sygus-sample-str-alpha")
-            << " \"" << String::convertUnsignedIntToChar(ch) << "\"";
+        Trace("sygus-sample-str-alpha") << " \\u" << ch;
        }
        Trace("sygus-sample-str-alpha") << std::endl;
      }
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp

index d5105a489fae9ea4f4190412be92716c0da09c31..9a2091eacb856937854d26aeefc8d185a89f3480 100644 (file)
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -739,9 +739,7 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
        }
        case kind::REGEXP_RANGE: {
          unsigned a = r[0].getConst<String>().front();
-        a = String::convertUnsignedIntToCode(a);
          unsigned b = r[1].getConst<String>().front();
-        b = String::convertUnsignedIntToCode(b);
          Assert(a < b);
          Assert(b < std::numeric_limits<unsigned>::max());
          for (unsigned c = a; c <= b; c++)
@@ -756,7 +754,6 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
            String s = st.getConst<String>();
            if(s.size() != 0) {
              unsigned sc = s.front();
-            sc = String::convertUnsignedIntToCode(sc);
              cset.insert(sc);
            }
          }
@@ -765,7 +762,6 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
            if(st[0].isConst()) {
              String s = st[0].getConst<String>();
              unsigned sc = s.front();
-            sc = String::convertUnsignedIntToCode(sc);
              cset.insert(sc);
            } else {
              vset.insert( st[0] );
@@ -887,13 +883,11 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
        case kind::REGEXP_RANGE: {
          std::vector< Node > vec;
          unsigned a = r[0].getConst<String>().front();
-        a = String::convertUnsignedIntToCode(a);
          unsigned b = r[1].getConst<String>().front();
-        b = String::convertUnsignedIntToCode(b);
          for (unsigned c = a; c <= b; c++)
          {
            std::vector<unsigned> tmpVec;
-          tmpVec.push_back(String::convertCodeToUnsignedInt(c));
+          tmpVec.push_back(c);
            Node tmp = s.eqNode(nm->mkConst(String(tmpVec))).negate();
            vec.push_back( tmp );
          }
@@ -1522,7 +1516,7 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node >
               ++it)
          {
            std::vector<unsigned> cvec;
-          cvec.push_back(String::convertCodeToUnsignedInt(*it));
+          cvec.push_back(*it);
            String c(cvec);
            Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
            Node r1l = derivativeSingle(r1, c);
diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp

index 716634d5f3dcf870fd6e933a24b049fbe3fc6dab..b0940b7e120712359aa89246b77a0c3fba47b1de 100644 (file)
--- a/src/theory/strings/sequences_rewriter.cpp
+++ b/src/theory/strings/sequences_rewriter.cpp
@@ -1421,11 +1421,8 @@ bool SequencesRewriter::testConstStringInRegExp(CVC4::String& s,
        if (s.size() == index_start + 1)
        {
          unsigned a = r[0].getConst<String>().front();
-        a = String::convertUnsignedIntToCode(a);
          unsigned b = r[1].getConst<String>().front();
-        b = String::convertUnsignedIntToCode(b);
          unsigned c = s.back();
-        c = String::convertUnsignedIntToCode(c);
          return (a <= c && c <= b);
        }
        else
diff --git a/src/theory/strings/strings_rewriter.cpp b/src/theory/strings/strings_rewriter.cpp

index 75dfe7432ce66a2aa2b7e8447b4ce39ee1ebe06b..c7676d04980c167e7584cba23bbccebb3fca7e45 100644 (file)
--- a/src/theory/strings/strings_rewriter.cpp
+++ b/src/theory/strings/strings_rewriter.cpp
@@ -93,7 +93,7 @@ Node StringsRewriter::rewriteStrConvert(Node node)
      std::vector<unsigned> nvec = node[0].getConst<String>().getVec();
      for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++)
      {
-      unsigned newChar = String::convertUnsignedIntToCode(nvec[i]);
+      unsigned newChar = nvec[i];
        // transform it
        // upper 65 ... 90
        // lower 97 ... 122
@@ -111,7 +111,6 @@ Node StringsRewriter::rewriteStrConvert(Node node)
            newChar = newChar + 32;
          }
        }
-      newChar = String::convertCodeToUnsignedInt(newChar);
        nvec[i] = newChar;
      }
      Node retNode = nm->mkConst(String(nvec));
@@ -231,7 +230,7 @@ Node StringsRewriter::rewriteStringToCode(Node n)
      {
        std::vector<unsigned> vec = s.getVec();
        Assert(vec.size() == 1);
-      ret = nm->mkConst(Rational(String::convertUnsignedIntToCode(vec[0])));
+      ret = nm->mkConst(Rational(vec[0]));
      }
      else
      {
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp

index 16183abdd2e85e459d008ef40a03705817d9a4ac..a81c96318d103bd7b238aceec0754266d082848a 100644 (file)
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -382,7 +382,7 @@ bool TheoryStrings::collectModelInfoType(
                    ctv.getConst<Rational>().getNumerator().toUnsignedInt();
                Trace("strings-model") << "(code: " << cvalue << ") ";
                std::vector<unsigned> vec;
-              vec.push_back(String::convertCodeToUnsignedInt(cvalue));
+              vec.push_back(cvalue);
                Node mv = nm->mkConst(String(vec));
                pure_eq_assign[eqc] = mv;
                m->getEqualityEngine()->addTerm(mv);
@@ -1099,13 +1099,14 @@ void TheoryStrings::registerTerm(Node n, int effort)
    else if (n.getKind() == STRING_TO_CODE)
    {
      d_has_str_code = true;
-    // ite( str.len(s)==1, 0 <= str.code(s) < num_codes, str.code(s)=-1 )
+    // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
      Node code_len = utils::mkNLength(n[0]).eqNode(d_one);
      Node code_eq_neg1 = n.eqNode(d_neg_one);
      Node code_range = nm->mkNode(
          AND,
          nm->mkNode(GEQ, n, d_zero),
-        nm->mkNode(LT, n, nm->mkConst(Rational(CVC4::String::num_codes()))));
+        nm->mkNode(
+            LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
      regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
    }
    else if (n.getKind() == STRING_STRIDOF)
diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h

index 7ef31a92a78b5fa6cf63bd9630a0fc227fb842a6..93a32f26eac731fb407f38f0982efb3badef3c24 100644 (file)
--- a/src/theory/strings/theory_strings_type_rules.h
+++ b/src/theory/strings/theory_strings_type_rules.h
@@ -299,7 +299,7 @@ public:
            throw TypeCheckingExceptionPrivate(n, "expecting a single constant string term in regexp range");
          }
          unsigned ci = (*it).getConst<String>().front();
-        ch[i] = String::convertUnsignedIntToCode(ci);
+        ch[i] = ci;
          ++it;
        }
        if(ch[0] > ch[1]) {
diff --git a/src/theory/strings/type_enumerator.cpp b/src/theory/strings/type_enumerator.cpp

index 12cf899b4e91706ee73142e22f62272550f2ca4f..7352ae5deeacf531f7f77c1dd4e1e97cf2247e25 100644 (file)
--- a/src/theory/strings/type_enumerator.cpp
+++ b/src/theory/strings/type_enumerator.cpp
@@ -21,6 +21,44 @@ namespace CVC4 {
  namespace theory {
  namespace strings {
  
+Node makeStandardModelConstant(const std::vector<unsigned>& vec,
+                               uint32_t cardinality)
+{
+  std::vector<unsigned> mvec;
+  // if we contain all of the printable characters
+  if (cardinality >= 255)
+  {
+    for (unsigned i = 0, vsize = vec.size(); i < vsize; i++)
+    {
+      unsigned curr = vec[i];
+      // convert
+      Assert(vec[i] < cardinality);
+      if (vec[i] <= 61)
+      {
+        // first 62 printable characters [\u{65}-\u{126}]: 'A', 'B', 'C', ...
+        curr = vec[i] + 65;
+      }
+      else if (vec[i] <= 94)
+      {
+        // remaining 33 printable characters [\u{32}-\u{64}]: ' ', '!', '"', ...
+        curr = vec[i] - 30;
+      }
+      else
+      {
+        // the remaining characters, starting with \u{127} and wrapping around
+        // the first 32 non-printable characters.
+        curr = (vec[i] + 32) % cardinality;
+      }
+      mvec.push_back(curr);
+    }
+  }
+  else
+  {
+    mvec = vec;
+  }
+  return NodeManager::currentNM()->mkConst(String(mvec));
+}
+
  WordIter::WordIter(uint32_t startLength) : d_hasEndLength(false), d_endLength(0)
  {
    for (uint32_t i = 0; i < startLength; i++)
@@ -117,7 +155,7 @@ bool StringEnumLen::increment()
  
  void StringEnumLen::mkCurr()
  {
-  d_curr = NodeManager::currentNM()->mkConst(String(d_witer->getData()));
+  d_curr = makeStandardModelConstant(d_witer->getData(), d_cardinality);
  }
  
  StringEnumerator::StringEnumerator(TypeNode type, TypeEnumeratorProperties* tep)
diff --git a/src/theory/strings/type_enumerator.h b/src/theory/strings/type_enumerator.h

index 2061628a581281d791eba9a27e8ebea3b920eb7e..b379ce5c3fbb0eeb9896c7d5f9e286e8596d0b9a 100644 (file)
--- a/src/theory/strings/type_enumerator.h
+++ b/src/theory/strings/type_enumerator.h
@@ -27,6 +27,26 @@ namespace CVC4 {
  namespace theory {
  namespace strings {
  
+/**
+ * Make standard model constant
+ *
+ * In our string representation, we represent characters using vectors
+ * of unsigned integers indicating code points for the characters of that
+ * string.
+ *
+ * To make models user-friendly, we make unsigned integer 0 correspond to the
+ * 65th character ("A") in the ASCII alphabet to make models intuitive. In
+ * particular, say if we have a set of string variables that are distinct but
+ * otherwise unconstrained, then the model may assign them "A", "B", "C", ...
+ *
+ * @param vec The code points of the string in a given model,
+ * @param cardinality The cardinality of the alphabet,
+ * @return A string whose characters have the code points corresponding
+ * to vec in the standard model construction described above.
+ */
+Node makeStandardModelConstant(const std::vector<unsigned>& vec,
+                               uint32_t cardinality);
+
  /**
   * Generic iteration over vectors of indices of a given start/end length.
   */
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp

index 00066edb61c8f8ff4be03192823869bf5bb6fc5a..36ba7182b937b1f222381a4a3816c17d45fed67f 100644 (file)
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -32,38 +32,12 @@ namespace CVC4 {
  
  static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
  
-unsigned String::convertCharToUnsignedInt(unsigned char c)
-{
-  return convertCodeToUnsignedInt(static_cast<unsigned>(c));
-}
-unsigned char String::convertUnsignedIntToChar(unsigned i)
-{
-  Assert(i < num_codes());
-  return static_cast<unsigned char>(convertUnsignedIntToCode(i));
-}
-bool String::isPrintable(unsigned i)
-{
-  Assert(i < num_codes());
-  unsigned char c = convertUnsignedIntToChar(i);
-  return (c >= ' ' && c <= '~');
-}
-unsigned String::convertCodeToUnsignedInt(unsigned c)
-{
-  Assert(c < num_codes());
-  return (c < start_code() ? c + num_codes() : c) - start_code();
-}
-unsigned String::convertUnsignedIntToCode(unsigned i)
-{
-  Assert(i < num_codes());
-  return (i + start_code()) % num_codes();
-}
-
  String::String(const std::vector<unsigned> &s) : d_str(s)
  {
  #ifdef CVC4_ASSERTIONS
    for (unsigned u : d_str)
    {
-    Assert(convertUnsignedIntToCode(u) < num_codes());
+    Assert(u < num_codes());
    }
  #endif
  }
@@ -74,8 +48,8 @@ int String::cmp(const String &y) const {
    }
    for (unsigned int i = 0; i < size(); ++i) {
      if (d_str[i] != y.d_str[i]) {
-      unsigned cp = convertUnsignedIntToCode(d_str[i]);
-      unsigned cpy = convertUnsignedIntToCode(y.d_str[i]);
+      unsigned cp = d_str[i];
+      unsigned cpy = y.d_str[i];
        return cp < cpy ? -1 : 1;
      }
    }
@@ -122,107 +96,143 @@ bool String::rstrncmp(const String& y, std::size_t n) const
    return true;
  }
  
-std::vector<unsigned> String::toInternal(const std::string &s,
-                                         bool useEscSequences) {
+void String::addCharToInternal(unsigned char ch, std::vector<unsigned>& str)
+{
+  // if not a printable character
+  if (ch > 127 || ch < 32)
+  {
+    std::stringstream serr;
+    serr << "Illegal string character: \"" << ch
+         << "\", must use escape sequence";
+    throw CVC4::Exception(serr.str());
+  }
+  else
+  {
+    str.push_back(static_cast<unsigned>(ch));
+  }
+}
+
+std::vector<unsigned> String::toInternal(const std::string& s,
+                                         bool useEscSequences)
+{
    std::vector<unsigned> str;
    unsigned i = 0;
-  while (i < s.size()) {
-    if (s[i] == '\\' && useEscSequences) {
-      i++;
-      if (i < s.size()) {
-        switch (s[i]) {
-          case 'n': {
-            str.push_back(convertCharToUnsignedInt('\n'));
-            i++;
-          } break;
-          case 't': {
-            str.push_back(convertCharToUnsignedInt('\t'));
-            i++;
-          } break;
-          case 'v': {
-            str.push_back(convertCharToUnsignedInt('\v'));
-            i++;
-          } break;
-          case 'b': {
-            str.push_back(convertCharToUnsignedInt('\b'));
-            i++;
-          } break;
-          case 'r': {
-            str.push_back(convertCharToUnsignedInt('\r'));
-            i++;
-          } break;
-          case 'f': {
-            str.push_back(convertCharToUnsignedInt('\f'));
-            i++;
-          } break;
-          case 'a': {
-            str.push_back(convertCharToUnsignedInt('\a'));
-            i++;
-          } break;
-          case '\\': {
-            str.push_back(convertCharToUnsignedInt('\\'));
-            i++;
-          } break;
-          case 'x': {
-            if (i + 2 < s.size()) {
-              if (isxdigit(s[i + 1]) && isxdigit(s[i + 2])) {
-                str.push_back(convertCharToUnsignedInt(hexToDec(s[i + 1]) * 16 +
-                                                       hexToDec(s[i + 2])));
-                i += 3;
-              } else {
-                throw CVC4::Exception("Illegal String Literal: \"" + s + "\"");
-              }
-            } else {
-              throw CVC4::Exception("Illegal String Literal: \"" + s +
-                                    "\", must have two digits after \\x");
-            }
-          } break;
-          default: {
-            if (isdigit(s[i])) {
-              // octal escape sequences  TODO : revisit (issue #1251).
-              int num = (int)s[i] - (int)'0';
-              bool flag = num < 4;
-              if (i + 1 < s.size() && num < 8 && isdigit(s[i + 1]) &&
-                  s[i + 1] < '8') {
-                num = num * 8 + (int)s[i + 1] - (int)'0';
-                if (flag && i + 2 < s.size() && isdigit(s[i + 2]) &&
-                    s[i + 2] < '8') {
-                  num = num * 8 + (int)s[i + 2] - (int)'0';
-                  str.push_back(convertCharToUnsignedInt((unsigned char)num));
-                  i += 3;
-                } else {
-                  str.push_back(convertCharToUnsignedInt((unsigned char)num));
-                  i += 2;
-                }
-              } else {
-                str.push_back(convertCharToUnsignedInt((unsigned char)num));
-                i++;
-              }
-            } else if ((unsigned)s[i] > 127) {
-              throw CVC4::Exception("Illegal String Literal: \"" + s +
-                                    "\", must use escaped sequence");
-            } else {
-              str.push_back(convertCharToUnsignedInt(s[i]));
-              i++;
-            }
+  while (i < s.size())
+  {
+    // get the current character
+    char si = s[i];
+    if (si != '\\' || !useEscSequences)
+    {
+      addCharToInternal(si, str);
+      ++i;
+      continue;
+    }
+    // the vector of characters, in case we fail to read an escape sequence
+    std::vector<unsigned> nonEscCache;
+    // process the '\'
+    addCharToInternal(si, nonEscCache);
+    ++i;
+    // are we an escape sequence?
+    bool isEscapeSequence = true;
+    // the string corresponding to the hexidecimal code point
+    std::stringstream hexString;
+    // is the slash followed by a 'u'? Could be last character.
+    if (i >= s.size() || s[i] != 'u')
+    {
+      isEscapeSequence = false;
+    }
+    else
+    {
+      // process the 'u'
+      addCharToInternal(s[i], nonEscCache);
+      ++i;
+      bool isStart = true;
+      bool isEnd = false;
+      bool hasBrace = false;
+      while (i < s.size())
+      {
+        // add the next character
+        si = s[i];
+        if (isStart)
+        {
+          isStart = false;
+          // possibly read '{'
+          if (si == '{')
+          {
+            hasBrace = true;
+            addCharToInternal(si, nonEscCache);
+            ++i;
+            continue;
            }
          }
-      } else {
-        throw CVC4::Exception("should be handled by lexer: \"" + s + "\"");
-        // str.push_back( convertCharToUnsignedInt('\\') );
+        else if (si == '}')
+        {
+          // can only end if we had an open brace and read at least one digit
+          isEscapeSequence = hasBrace && !hexString.str().empty();
+          isEnd = true;
+          addCharToInternal(si, nonEscCache);
+          ++i;
+          break;
+        }
+        // must be a hex digit at this point
+        if (!isHexDigit(static_cast<unsigned>(si)))
+        {
+          isEscapeSequence = false;
+          break;
+        }
+        hexString << si;
+        addCharToInternal(si, nonEscCache);
+        ++i;
+        if (!hasBrace && hexString.str().size() == 4)
+        {
+          // will be finished reading \ u d_3 d_2 d_1 d_0 with no parens
+          isEnd = true;
+          break;
+        }
+        else if (hasBrace && hexString.str().size() > 5)
+        {
+          // too many digits enclosed in brace, not an escape sequence
+          isEscapeSequence = false;
+          break;
+        }
+      }
+      if (!isEnd)
+      {
+        // if we were interupted before ending, then this is not a valid
+        // escape sequence
+        isEscapeSequence = false;
+      }
+    }
+    if (isEscapeSequence)
+    {
+      Assert(!hexString.str().empty() && hexString.str().size() <= 5);
+      // Otherwise, we add the escaped character.
+      // This is guaranteed not to overflow due to the length of hstr.
+      uint32_t val;
+      hexString >> std::hex >> val;
+      if (val > num_codes())
+      {
+        // Failed due to being out of range. This can happen for strings of
+        // the form \ u { d_4 d_3 d_2 d_1 d_0 } where d_4 is a hexidecimal not
+        // in the range [0-2].
+        isEscapeSequence = false;
+      }
+      else
+      {
+        str.push_back(val);
        }
-    } else if ((unsigned)s[i] > 127 && useEscSequences) {
-      throw CVC4::Exception("Illegal String Literal: \"" + s +
-                            "\", must use escaped sequence");
-    } else {
-      str.push_back(convertCharToUnsignedInt(s[i]));
-      i++;
+    }
+    // if we did not successfully parse an escape sequence, we add back all
+    // characters that we cached
+    if (!isEscapeSequence)
+    {
+      str.insert(str.end(), nonEscCache.begin(), nonEscCache.end());
      }
    }
  #ifdef CVC4_ASSERTIONS
    for (unsigned u : str)
    {
-    Assert(convertUnsignedIntToCode(u) < num_codes());
+    Assert(u < num_codes());
    }
  #endif
    return str;
@@ -265,62 +275,23 @@ std::size_t String::roverlap(const String &y) const {
  }
  
  std::string String::toString(bool useEscSequences) const {
-  std::string str;
+  std::stringstream str;
    for (unsigned int i = 0; i < size(); ++i) {
-    unsigned char c = convertUnsignedIntToChar(d_str[i]);
-    if (!useEscSequences) {
-      str += c;
-    } else if (isprint(c)) {
-      if (c == '\\') {
-        str += "\\\\";
-      }
-      // else if(c == '\"') {
-      //  str += "\\\"";
-      //}
-      else {
-        str += c;
-      }
-    } else {
-      std::string s;
-      switch (c) {
-        case '\a':
-          s = "\\a";
-          break;
-        case '\b':
-          s = "\\b";
-          break;
-        case '\t':
-          s = "\\t";
-          break;
-        case '\r':
-          s = "\\r";
-          break;
-        case '\v':
-          s = "\\v";
-          break;
-        case '\f':
-          s = "\\f";
-          break;
-        case '\n':
-          s = "\\n";
-          break;
-        case '\e':
-          s = "\\e";
-          break;
-        default: {
-          std::stringstream ss;
-          ss << std::setfill('0') << std::setw(2) << std::hex << ((int)c);
-          std::string t = ss.str();
-          t = t.substr(t.size() - 2, 2);
-          s = "\\x" + t;
-          // std::string s2 = static_cast<std::ostringstream*>(
-          // &(std::ostringstream() << (int)c) )->str();
-        }
-      }
-      str += s;
+    // we always print forward slash as a code point so that it cannot
+    // be interpreted as specifying part of a code point, e.g. the string
+    // '\' + 'u' + '0' of length three.
+    if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences)
+    {
+      str << static_cast<char>(d_str[i]);
+    }
+    else
+    {
+      std::stringstream ss;
+      ss << std::hex << d_str[i];
+      str << "\\u{" << ss.str() << "}";
      }
    }
-  return str;
+  return str.str();
  }
  
  bool String::isLeq(const String &y) const
@@ -331,8 +302,8 @@ bool String::isLeq(const String &y) const
      {
        return false;
      }
-    unsigned ci = convertUnsignedIntToCode(d_str[i]);
-    unsigned cyi = convertUnsignedIntToCode(y.d_str[i]);
+    unsigned ci = d_str[i];
+    unsigned cyi = y.d_str[i];
      if (ci > cyi)
      {
        return false;
@@ -484,8 +455,21 @@ bool String::isNumber() const {
  
  bool String::isDigit(unsigned character)
  {
-  unsigned char c = convertUnsignedIntToChar(character);
-  return c >= '0' && c <= '9';
+  // '0' to '9'
+  return 48 <= character && character <= 57;
+}
+
+bool String::isHexDigit(unsigned character)
+{
+  // '0' to '9' or 'A' to 'F' or 'a' to 'f'
+  return isDigit(character) || (65 <= character && character <= 70)
+         || (97 <= character && character <= 102);
+}
+
+bool String::isPrintable(unsigned character)
+{
+  // Unicode 0x00020 (' ') to 0x0007E ('~')
+  return 32 <= character && character <= 126;
  }
  
  size_t String::maxSize() { return std::numeric_limits<uint32_t>::max(); }
@@ -497,17 +481,6 @@ Rational String::toNumber() const
    return Rational(toString());
  }
  
-unsigned char String::hexToDec(unsigned char c) {
-  if (c >= '0' && c <= '9') {
-    return c - '0';
-  } else if (c >= 'a' && c <= 'f') {
-    return c - 'a' + 10;
-  } else {
-    Assert(c >= 'A' && c <= 'F');
-    return c - 'A' + 10;
-  }
-}
-
  std::ostream &operator<<(std::ostream &os, const String &s) {
    return os << "\"" << s.toString(true) << "\"";
  }
diff --git a/src/util/regexp.h b/src/util/regexp.h

index 731736f72d1729648b534e00464da96c570955d3..56fb969a309f6890e86d222a519236e5e24ae1e1 100644 (file)
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -36,61 +36,45 @@ namespace CVC4 {
   */
  class CVC4_PUBLIC String {
   public:
-  /**
-   * The start ASCII code. In our string representation below, we represent
-   * characters using a vector d_str of unsigned integers. We refer to this as
-   * the "internal representation" for the string.
-   *
-   * We make unsigned integer 0 correspond to the 65th character ("A") in the
-   * ASCII alphabet to make models intuitive. In particular, say if we have
-   * a set of string variables that are distinct but otherwise unconstrained,
-   * then the model may assign them "A", "B", "C", ...
-   */
-  static inline unsigned start_code() { return 65; }
    /**
     * This is the cardinality of the alphabet that is representable by this
     * class. Notice that this must be greater than or equal to the cardinality
     * of the alphabet that the string theory reasons about.
     *
     * This must be strictly less than std::numeric_limits<unsigned>::max().
+   *
+   * As per the SMT-LIB standard for strings, we support the first 3 planes of
+   * Unicode characters, where 196608 = 3*16^4.
     */
-  static inline unsigned num_codes() { return 256; }
-  /**
-   * Convert unsigned char to the unsigned used in the internal representation
-   * in d_str below.
-   */
-  static unsigned convertCharToUnsignedInt(unsigned char c);
-  /** Convert the internal unsigned to a unsigned char. */
-  static unsigned char convertUnsignedIntToChar(unsigned i);
-  /** Does the internal unsigned correspond to a printable character? */
-  static bool isPrintable(unsigned i);
-  /** get the internal unsigned for ASCII code c. */
-  static unsigned convertCodeToUnsignedInt(unsigned c);
-  /** get the ASCII code number that internal unsigned i corresponds to. */
-  static unsigned convertUnsignedIntToCode(unsigned i);
-
+  static inline unsigned num_codes() { return 196608; }
    /** constructors for String
-  *
-  * Internally, a CVC4::String is represented by a vector of unsigned
-  * integers (d_str), where the correspondence between C++ characters
-  * to and from unsigned integers is determined by
-  * by convertCharToUnsignedInt and convertUnsignedIntToChar.
-  *
-  * If useEscSequences is true, then the escape sequences in the input
-  * are converted to the corresponding character. This constructor may
-  * throw an exception if the input contains unrecognized escape sequences.
-  * Currently supported escape sequences are \n, \t, \v, \b, \r, \f, \a, \\,
-  * \x[N] where N is a hexidecimal, and octal escape sequences of the
-  * form \[c1]([c2]([c3])?)? where c1, c2, c3 are digits from 0 to 7.
-  *
-  * If useEscSequences is false, then the characters of the constructed
-  * CVC4::String correspond one-to-one with the input string.
-  */
+   *
+   * Internally, a CVC4::String is represented by a vector of unsigned
+   * integers (d_str) representing the code points of the characters.
+   *
+   * To build a string from a C++ string, we may process escape sequences
+   * according to the SMT-LIB standard. In particular, if useEscSequences is
+   * true, we convert unicode escape sequences:
+   *  \u d_3 d_2 d_1 d_0
+   *  \u{d_0}
+   *  \u{d_1 d_0}
+   *  \u{d_2 d_1 d_0}
+   *  \u{d_3 d_2 d_1 d_0}
+   *  \u{d_4 d_3 d_2 d_1 d_0}
+   * where d_0 ... d_4 are hexidecimal digits, to the appropriate character.
+   *
+   * If useEscSequences is false, then the characters of the constructed
+   * CVC4::String correspond one-to-one with the input string.
+   */
    String() = default;
    explicit String(const std::string& s, bool useEscSequences = false)
-      : d_str(toInternal(s, useEscSequences)) {}
+      : d_str(toInternal(s, useEscSequences))
+  {
+  }
    explicit String(const char* s, bool useEscSequences = false)
-      : d_str(toInternal(std::string(s), useEscSequences)) {}
+      : d_str(toInternal(std::string(s), useEscSequences))
+  {
+  }
    explicit String(const std::vector<unsigned>& s);
  
    String& operator=(const String& y) {
@@ -123,20 +107,16 @@ class CVC4_PUBLIC String {
    bool rstrncmp(const String& y, std::size_t n) const;
  
    /* toString
-  * Converts this string to a std::string.
-  *
-  * If useEscSequences is true, then unprintable characters
-  * are converted to escape sequences. The escape sequences
-  * \n, \t, \v, \b, \r, \f, \a, \\ are printed in this way.
-  * For all other unprintable characters, we print \x[N] where
-  * [N] is the 2 digit hexidecimal corresponding to value of
-  * the character.
-  *
-  * If useEscSequences is false, the returned std::string's characters
-  * map one-to-one with the characters in this string.
-  * Notice that for all std::string s, we have that
-  *    CVC4::String( s ).toString() = s.
-  */
+   * Converts this string to a std::string.
+   *
+   * The unprintable characters are converted to unicode escape sequences as
+   * described above.
+   *
+   * If useEscSequences is false, the string's printable characters are
+   * printed as characters. Notice that for all std::string s having only
+   * printable characters, we have that
+   *    CVC4::String( s ).toString() = s.
+   */
    std::string toString(bool useEscSequences = false) const;
    /** is this the empty string? */
    bool empty() const { return d_str.empty(); }
@@ -221,16 +201,32 @@ class CVC4_PUBLIC String {
    bool isNumber() const;
    /** Returns the corresponding rational for the text of this string. */
    Rational toNumber() const;
-  /** get the internal unsigned representation of this string */
+  /** Get the unsigned representation (code points) of this string */
    const std::vector<unsigned>& getVec() const { return d_str; }
-  /** get the internal unsigned value of the first character in this string */
+  /**
+   * Get the unsigned (code point) value of the first character in this string
+   */
    unsigned front() const;
-  /** get the internal unsigned value of the last character in this string */
+  /**
+   * Get the unsigned (code point) value of the last character in this string
+   */
    unsigned back() const;
    /** is the unsigned a digit?
-  * The input should be the same type as the element type of d_str
-  */
+   *
+   * This is true for code points between 48 ('0') and 57 ('9').
+   */
    static bool isDigit(unsigned character);
+  /** is the unsigned a hexidecimal digit?
+   *
+   * This is true for code points between 48 ('0') and 57 ('9'), code points
+   * between 65 ('A') and 70 ('F) and code points between 97 ('a') and 102 ('f).
+   */
+  static bool isHexDigit(unsigned character);
+  /** is the unsigned a printable code point?
+   *
+   * This is true for Unicode 32 (' ') to 126 ('~').
+   */
+  static bool isPrintable(unsigned character);
  
    /**
     * Returns the maximum length of string representable by this class.
@@ -238,11 +234,19 @@ class CVC4_PUBLIC String {
     */
    static size_t maxSize();
   private:
-  // guarded
-  static unsigned char hexToDec(unsigned char c);
-
+  /**
+   * Helper for toInternal: add character ch to vector vec, storing a string in
+   * internal format. This throws an error if ch is not a printable character,
+   * since non-printable characters must be escaped in SMT-LIB.
+   */
+  static void addCharToInternal(unsigned char ch, std::vector<unsigned>& vec);
+  /**
+   * Convert the string s to the internal format (vector of code points).
+   * The argument useEscSequences is whether to process unicode escape
+   * sequences.
+   */
    static std::vector<unsigned> toInternal(const std::string& s,
-                                          bool useEscSequences = true);
+                                          bool useEscSequences);
  
    /**
     * Returns a negative number if *this < y, 0 if *this and y are equal and a
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt

index 8fab16b4493d6133fae9f04a934f8666ee01d2f6..8382e40fce80ae9faa6ba060dbfdc1b57ce95e20 100644 (file)
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -922,6 +922,7 @@ set(regress_0_tests
    regress0/strings/escchar.smt2
    regress0/strings/escchar_25.smt2
    regress0/strings/from_code.smt2
+  regress0/strings/gen-esc-seq.smt2
    regress0/strings/hconst-092618.smt2
    regress0/strings/idof-rewrites.smt2
    regress0/strings/idof-sem.smt2
@@ -939,6 +940,8 @@ set(regress_0_tests
    regress0/strings/leadingzero001.smt2
    regress0/strings/loop001.smt2
    regress0/strings/model001.smt2
+  regress0/strings/model-code-point.smt2
+  regress0/strings/model-friendly.smt2
    regress0/strings/ncontrib-rewrites.smt2
    regress0/strings/norn-31.smt2
    regress0/strings/norn-simp-rew.smt2
@@ -967,6 +970,7 @@ set(regress_0_tests
    regress0/strings/tolower-rrs.smt2
    regress0/strings/tolower-simple.smt2
    regress0/strings/type001.smt2
+  regress0/strings/unicode-esc.smt2
    regress0/strings/unsound-0908.smt2
    regress0/strings/unsound-repl-rewrite.smt2
    regress0/sygus/General_plus10.sy
diff --git a/test/regress/regress0/strings/gen-esc-seq.smt2 b/test/regress/regress0/strings/gen-esc-seq.smt2

new file mode 100644 (file)

index 0000000..59f6604
--- /dev/null
+++ b/test/regress/regress0/strings/gen-esc-seq.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --produce-models --lang=smt2.6.1
+; EXPECT: sat
+; EXPECT: ((x "\u{5c}u1000"))
+(set-logic ALL)
+(set-info :status sat)
+(declare-const x String)
+(assert (= x (str.++ "\u" "1000")))
+(check-sat)
+(get-value (x))
diff --git a/test/regress/regress0/strings/model-code-point.smt2 b/test/regress/regress0/strings/model-code-point.smt2

new file mode 100644 (file)

index 0000000..1200ae7
--- /dev/null
+++ b/test/regress/regress0/strings/model-code-point.smt2
@@ -0,0 +1,13 @@
+; COMMAND-LINE: --lang=smt2.6.1 --produce-models
+; EXPECT: sat
+; EXPECT: ((x "\u{a}"))
+; EXPECT: ((y "\u{7f}"))
+(set-logic ALL)
+(set-info :status sat)
+(declare-fun x () String)
+(declare-fun y () String)
+(assert (= (str.to_code x) 10))
+(assert (= (str.to_code y) 127))
+(check-sat)
+(get-value (x))
+(get-value (y))
diff --git a/test/regress/regress0/strings/model-friendly.smt2 b/test/regress/regress0/strings/model-friendly.smt2

new file mode 100644 (file)

index 0000000..985ffaa
--- /dev/null
+++ b/test/regress/regress0/strings/model-friendly.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --lang=smt2.6.1 --produce-models
+; EXPECT: sat
+; EXPECT: ((x "AAAAA"))
+(set-logic ALL)
+(set-info :status sat)
+(declare-fun x () String)
+(assert (= (str.len x) 5))
+(check-sat)
+(get-value (x))
diff --git a/test/regress/regress0/strings/unicode-esc.smt2 b/test/regress/regress0/strings/unicode-esc.smt2

new file mode 100644 (file)

index 0000000..01f5f30
--- /dev/null
+++ b/test/regress/regress0/strings/unicode-esc.smt2
@@ -0,0 +1,30 @@
+; COMMAND-LINE: --strings-exp --lang=smt2.6.1
+; EXPECT: sat
+(set-logic ALL)
+
+(assert (= "\u{14}" "\u0014"))
+(assert (= "\u{00}" "\u{0}"))
+(assert (= "\u0000" "\u{0}"))
+(assert (= (str.len "\u1234") 1))
+(assert (= (str.len "\u{1}") 1))
+(assert (= (str.len "\u{99}") 1))
+(assert (= (str.len "\u{779}") 1))
+(assert (= (str.len "\u{0779}") 1))
+(assert (= (str.len "\u{01779}") 1))
+(assert (= (str.len "\u{001779}") 10))
+(assert (= (str.len "\u{0vv79}") 9))
+(assert (= (str.len "\u{11\u1234}") 7))
+(assert (= (str.len "\u12345") 2))
+(assert (= (str.len "\uu") 3))
+(assert (= (str.len "\u{123}\u{567}") 2))
+(assert (= (str.len "\u{0017") 7))
+(assert (= (str.len "\\u00178") 3))
+(assert (= (str.len "2\u{}") 5))
+(assert (= (str.len "\uaaaa") 1))
+(assert (= (str.len "\uAAAA") 1))
+(assert (= (str.len "\u{0AbC}") 1))
+(assert (= (str.len "\u{E}") 1))
+(assert (= (str.len "\u{44444}") 9))
+(assert (= (str.len "\u") 2))
+
+(check-sat)
diff --git a/test/unit/api/solver_black.h b/test/unit/api/solver_black.h

index 27f5aca12b6c2de4848c0f8e27655dc6504656a8..0eefde700b4dcc2ba5e9aad0f91637cf11b0e3af 100644 (file)
--- a/test/unit/api/solver_black.h
+++ b/test/unit/api/solver_black.h
@@ -556,9 +556,9 @@ void SolverBlack::testMkString()
    TS_ASSERT_THROWS_NOTHING(d_solver->mkString(""));
    TS_ASSERT_THROWS_NOTHING(d_solver->mkString("asdfasdf"));
    TS_ASSERT_EQUALS(d_solver->mkString("asdf\\nasdf").toString(),
-                   "\"asdf\\\\nasdf\"");
-  TS_ASSERT_EQUALS(d_solver->mkString("asdf\\nasdf", true).toString(),
-                   "\"asdf\\nasdf\"");
+                   "\"asdf\\u{5c}nasdf\"");
+  TS_ASSERT_EQUALS(d_solver->mkString("asdf\\u{005c}nasdf", true).toString(),
+                    "\"asdf\\u{5c}nasdf\"");
  }
  
  void SolverBlack::testMkTerm()
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Fri, 27 Mar 2020 14:01:38 +0000 (09:01 -0500)
committer	GitHub <noreply@github.com>
	Fri, 27 Mar 2020 14:01:38 +0000 (09:01 -0500)
src/parser/cvc/Cvc.g		patch \| blob \| history
src/parser/parser.cpp		patch \| blob \| history
src/parser/parser.h		patch \| blob \| history
src/parser/smt2/Smt2.g		patch \| blob \| history
src/preprocessing/passes/synth_rew_rules.cpp		patch \| blob \| history
src/printer/cvc/cvc_printer.cpp		patch \| blob \| history
src/printer/smt2/smt2_printer.cpp		patch \| blob \| history
src/theory/evaluator.cpp		patch \| blob \| history
src/theory/quantifiers/sygus_sampler.cpp		patch \| blob \| history
src/theory/strings/regexp_operation.cpp		patch \| blob \| history
src/theory/strings/sequences_rewriter.cpp		patch \| blob \| history
src/theory/strings/strings_rewriter.cpp		patch \| blob \| history
src/theory/strings/theory_strings.cpp		patch \| blob \| history
src/theory/strings/theory_strings_type_rules.h		patch \| blob \| history
src/theory/strings/type_enumerator.cpp		patch \| blob \| history
src/theory/strings/type_enumerator.h		patch \| blob \| history
src/util/regexp.cpp		patch \| blob \| history
src/util/regexp.h		patch \| blob \| history
test/regress/CMakeLists.txt		patch \| blob \| history
test/regress/regress0/strings/gen-esc-seq.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress0/strings/model-code-point.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress0/strings/model-friendly.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress0/strings/unicode-esc.smt2	[new file with mode: 0644]	patch \| blob
test/unit/api/solver_black.h		patch \| blob \| history