From 115511f6c5732cac1c3718d365d1cf5596541c95 Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Tue, 22 Sep 2020 17:41:09 -0700 Subject: [PATCH] [Python API] Conversion to/from Unicode strings (#5120) Fixes #5024. This commit adds a conversion from constant string terms to native Python Unicode strings in Term.toPythonObj() and improves Solver.mkString() to accept strings containing characters outside of the printable range. --- src/api/python/cvc4.pxi | 25 +++++++++++++++++++++- src/util/string.cpp | 6 +++--- test/unit/api/python/test_to_python_obj.py | 14 ++++++++++-- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/src/api/python/cvc4.pxi b/src/api/python/cvc4.pxi index 9e4102b90..daff390b4 100644 --- a/src/api/python/cvc4.pxi +++ b/src/api/python/cvc4.pxi @@ -700,7 +700,9 @@ cdef class Solver: cdef Term term = Term(self) cdef vector[unsigned] v if isinstance(str_or_vec, str): - term.cterm = self.csolver.mkString( str_or_vec.encode()) + for u in str_or_vec: + v.push_back( ord(u)) + term.cterm = self.csolver.mkString( v) elif isinstance(str_or_vec, list): for u in str_or_vec: if not isinstance(u, int): @@ -1517,6 +1519,7 @@ cdef class Term: BV -- returns a Python int (treats BV as unsigned) Array -- returns a Python dict mapping indices to values -- the constant base is returned as the default value + String -- returns a Python Unicode string ''' if not self.isConst(): @@ -1590,6 +1593,26 @@ cdef class Term: res = defaultdict(lambda : base_value) for k, v in zip(keys, values): res[k] = v + elif sort.isString(): + # Strip leading and trailing double quotes and replace double + # double quotes by single quotes + string_repr = string_repr[1:-1].replace('""', '"') + + # Convert escape sequences + res = '' + escape_prefix = '\\u{' + i = 0 + while True: + prev_i = i + i = string_repr.find(escape_prefix, i) + if i == -1: + res += string_repr[prev_i:] + break + + res += string_repr[prev_i:i] + val = string_repr[i + len(escape_prefix):string_repr.find('}', i)] + res += chr(int(val, 16)) + i += len(escape_prefix) + len(val) + 1 else: raise ValueError("Cannot convert term {}" " of sort {} to Python object".format(string_repr, diff --git a/src/util/string.cpp b/src/util/string.cpp index af16e6a62..0d40ebb05 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -274,9 +274,9 @@ std::size_t String::roverlap(const String &y) const { std::string String::toString(bool useEscSequences) const { std::stringstream str; for (unsigned int i = 0; i < size(); ++i) { - // we always print forward slash as a code point so that it cannot - // be interpreted as specifying part of a code point, e.g. the string - // '\' + 'u' + '0' of length three. + // we always print backslash as a code point so that it cannot be + // interpreted as specifying part of a code point, e.g. the string '\' + + // 'u' + '0' of length three. if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences) { str << static_cast(d_str[i]); diff --git a/test/unit/api/python/test_to_python_obj.py b/test/unit/api/python/test_to_python_obj.py index 69b7b9228..f8cd234ee 100644 --- a/test/unit/api/python/test_to_python_obj.py +++ b/test/unit/api/python/test_to_python_obj.py @@ -49,8 +49,6 @@ def testGetArray(): array_dict = stores.toPythonObj() - print(array_dict) - assert array_dict[1] == 2 assert array_dict[2] == 3 assert array_dict[4] == 5 @@ -64,3 +62,15 @@ def testGetSymbol(): with pytest.raises(RuntimeError): x.toPythonObj() + + +def testGetString(): + solver = pycvc4.Solver() + + s1 = '"test\n"😃\\u{a}' + t1 = solver.mkString(s1) + assert s1 == t1.toPythonObj() + + s2 = '❤️CVC4❤️' + t2 = solver.mkString(s2) + assert s2 == t2.toPythonObj() -- 2.30.2