[Python API] Conversion to/from Unicode strings (#5120)

author Andres Noetzli <andres.noetzli@gmail.com>

Wed, 23 Sep 2020 00:41:09 +0000 (17:41 -0700)

committer GitHub <noreply@github.com>

Wed, 23 Sep 2020 00:41:09 +0000 (19:41 -0500)
author Andres Noetzli <andres.noetzli@gmail.com>
Wed, 23 Sep 2020 00:41:09 +0000 (17:41 -0700)
committer GitHub <noreply@github.com>
Wed, 23 Sep 2020 00:41:09 +0000 (19:41 -0500)
diff --git a/src/api/python/cvc4.pxi b/src/api/python/cvc4.pxi

index 9e4102b909802f553aeab9f62caff174ac9b065b..daff390b4022467dcf08a8c5874ac67f901fc621 100644 (file)
--- a/src/api/python/cvc4.pxi
+++ b/src/api/python/cvc4.pxi
@@ -700,7 +700,9 @@ cdef class Solver:
          cdef Term term = Term(self)
          cdef vector[unsigned] v
          if isinstance(str_or_vec, str):
-            term.cterm = self.csolver.mkString(<string &> str_or_vec.encode())
+            for u in str_or_vec:
+                v.push_back(<unsigned> ord(u))
+            term.cterm = self.csolver.mkString(<const vector[unsigned]&> v)
          elif isinstance(str_or_vec, list):
              for u in str_or_vec:
                  if not isinstance(u, int):
@@ -1517,6 +1519,7 @@ cdef class Term:
            BV      -- returns a Python int (treats BV as unsigned)
            Array   -- returns a Python dict mapping indices to values
                    -- the constant base is returned as the default value
+          String  -- returns a Python Unicode string
          '''
  
          if not self.isConst():
@@ -1590,6 +1593,26 @@ cdef class Term:
              res = defaultdict(lambda : base_value)
              for k, v in zip(keys, values):
                  res[k] = v
+        elif sort.isString():
+            # Strip leading and trailing double quotes and replace double
+            # double quotes by single quotes
+            string_repr = string_repr[1:-1].replace('""', '"')
+
+            # Convert escape sequences
+            res = ''
+            escape_prefix = '\\u{'
+            i = 0
+            while True:
+              prev_i = i
+              i = string_repr.find(escape_prefix, i)
+              if i == -1:
+                res += string_repr[prev_i:]
+                break
+
+              res += string_repr[prev_i:i]
+              val = string_repr[i + len(escape_prefix):string_repr.find('}', i)]
+              res += chr(int(val, 16))
+              i += len(escape_prefix) + len(val) + 1
          else:
              raise ValueError("Cannot convert term {}"
                               " of sort {} to Python object".format(string_repr,
diff --git a/src/util/string.cpp b/src/util/string.cpp

index af16e6a627e6888f94aaf9d9ad5e46f150a8815b..0d40ebb05d77764effdfb6d583555026fd421bbc 100644 (file)
--- a/src/util/string.cpp
+++ b/src/util/string.cpp
@@ -274,9 +274,9 @@ std::size_t String::roverlap(const String &y) const {
  std::string String::toString(bool useEscSequences) const {
    std::stringstream str;
    for (unsigned int i = 0; i < size(); ++i) {
-    // we always print forward slash as a code point so that it cannot
-    // be interpreted as specifying part of a code point, e.g. the string
-    // '\' + 'u' + '0' of length three.
+    // we always print backslash as a code point so that it cannot be
+    // interpreted as specifying part of a code point, e.g. the string '\' +
+    // 'u' + '0' of length three.
      if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences)
      {
        str << static_cast<char>(d_str[i]);
diff --git a/test/unit/api/python/test_to_python_obj.py b/test/unit/api/python/test_to_python_obj.py

index 69b7b9228e4c05076f63e0e17b64d45054969186..f8cd234ee52b48084d5f18f9d1bcd6359a526aa4 100644 (file)
--- a/test/unit/api/python/test_to_python_obj.py
+++ b/test/unit/api/python/test_to_python_obj.py
@@ -49,8 +49,6 @@ def testGetArray():
  
      array_dict = stores.toPythonObj()
  
-    print(array_dict)
-
      assert array_dict[1] == 2
      assert array_dict[2] == 3
      assert array_dict[4] == 5
@@ -64,3 +62,15 @@ def testGetSymbol():
  
      with pytest.raises(RuntimeError):
          x.toPythonObj()
+
+
+def testGetString():
+    solver = pycvc4.Solver()
+
+    s1 = '"test\n"😃\\u{a}'
+    t1 = solver.mkString(s1)
+    assert s1 == t1.toPythonObj()
+
+    s2 = '❤️CVC4❤️'
+    t2 = solver.mkString(s2)
+    assert s2 == t2.toPythonObj()
author	Andres Noetzli <andres.noetzli@gmail.com>
	Wed, 23 Sep 2020 00:41:09 +0000 (17:41 -0700)
committer	GitHub <noreply@github.com>
	Wed, 23 Sep 2020 00:41:09 +0000 (19:41 -0500)
src/api/python/cvc4.pxi		patch \| blob \| history
src/util/string.cpp		patch \| blob \| history
test/unit/api/python/test_to_python_obj.py		patch \| blob \| history