From 936e9c442443799c866a65c6ca3fbdcd3ac9aab8 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 1 Apr 2020 00:30:44 -0500 Subject: [PATCH] Support char smt-lib syntax (#4188) Towards support for the strings standard. Adds support to (_ char #x ... ) syntax for characters. --- src/api/cvc4cpp.cpp | 35 +++++++++++++++++-- src/api/cvc4cpp.h | 22 ++++++++++-- src/parser/smt2/Smt2.g | 14 +++++--- test/regress/CMakeLists.txt | 1 + .../strings/char-representations.smt2 | 22 ++++++++++++ .../regress/regress0/strings/unicode-esc.smt2 | 2 ++ test/unit/api/solver_black.h | 12 +++++++ 7 files changed, 98 insertions(+), 10 deletions(-) create mode 100644 test/regress/regress0/strings/char-representations.smt2 diff --git a/src/api/cvc4cpp.cpp b/src/api/cvc4cpp.cpp index fff4bef85..2e6e70d6b 100644 --- a/src/api/cvc4cpp.cpp +++ b/src/api/cvc4cpp.cpp @@ -2294,7 +2294,7 @@ Term Solver::mkValHelper(T t) const return res; } -Term Solver::mkRealFromStrHelper(std::string s) const +Term Solver::mkRealFromStrHelper(const std::string& s) const { /* CLN and GMP handle this case differently, CLN interprets it as 0, GMP * throws an std::invalid_argument exception. For consistency, we treat it @@ -2318,7 +2318,7 @@ Term Solver::mkBVFromIntHelper(uint32_t size, uint64_t val) const CVC4_API_SOLVER_TRY_CATCH_END; } -Term Solver::mkBVFromStrHelper(std::string s, uint32_t base) const +Term Solver::mkBVFromStrHelper(const std::string& s, uint32_t base) const { CVC4_API_ARG_CHECK_EXPECTED(!s.empty(), s) << "a non-empty string"; CVC4_API_ARG_CHECK_EXPECTED(base == 2 || base == 10 || base == 16, s) @@ -2328,7 +2328,7 @@ Term Solver::mkBVFromStrHelper(std::string s, uint32_t base) const } Term Solver::mkBVFromStrHelper(uint32_t size, - std::string s, + const std::string& s, uint32_t base) const { CVC4_API_ARG_CHECK_EXPECTED(!s.empty(), s) << "a non-empty string"; @@ -2353,6 +2353,20 @@ Term Solver::mkBVFromStrHelper(uint32_t size, return mkValHelper(CVC4::BitVector(size, val)); } +Term Solver::mkCharFromStrHelper(const std::string& s) const +{ + CVC4_API_CHECK(s.find_first_not_of("0123456789abcdefABCDEF", 0) + == std::string::npos + && s.size() <= 5 && s.size() > 0) + << "Unexpected string for hexidecimal character " << s; + uint32_t val = static_cast(std::stoul(s, 0, 16)); + CVC4_API_CHECK(val < String::num_codes()) + << "Not a valid code point for hexidecimal character " << s; + std::vector cpts; + cpts.push_back(val); + return mkValHelper(CVC4::String(cpts)); +} + Term Solver::mkTermFromKind(Kind kind) const { CVC4_API_SOLVER_TRY_CATCH_BEGIN; @@ -2951,6 +2965,21 @@ Term Solver::mkString(const std::vector& s) const CVC4_API_SOLVER_TRY_CATCH_END; } +Term Solver::mkChar(const std::string& s) const +{ + CVC4_API_SOLVER_TRY_CATCH_BEGIN; + return mkCharFromStrHelper(s); + CVC4_API_SOLVER_TRY_CATCH_END; +} + +Term Solver::mkChar(const char* s) const +{ + CVC4_API_SOLVER_TRY_CATCH_BEGIN; + CVC4_API_ARG_CHECK_NOT_NULLPTR(s); + return mkCharFromStrHelper(std::string(s)); + CVC4_API_SOLVER_TRY_CATCH_END; +} + Term Solver::mkUniverseSet(Sort sort) const { CVC4_API_SOLVER_TRY_CATCH_BEGIN; diff --git a/src/api/cvc4cpp.h b/src/api/cvc4cpp.h index 1a1cd3b61..edff95a2f 100644 --- a/src/api/cvc4cpp.h +++ b/src/api/cvc4cpp.h @@ -2302,6 +2302,20 @@ class CVC4_PUBLIC Solver */ Term mkString(const std::vector& s) const; + /** + * Create a character constant from a given string. + * @param s the string denoting the code point of the character (in base 16) + * @return the character constant + */ + Term mkChar(const std::string& s) const; + + /** + * Create a character constant from a given string. + * @param s the string denoting the code point of the character (in base 16) + * @return the character constant + */ + Term mkChar(const char* s) const; + /** * Create a universe set of the given sort. * @param sort the sort of the set elements @@ -2812,18 +2826,20 @@ class CVC4_PUBLIC Solver template Term mkValHelper(T t) const; /* Helper for mkReal functions that take a string as argument. */ - Term mkRealFromStrHelper(std::string s) const; + Term mkRealFromStrHelper(const std::string& s) const; /* Helper for mkBitVector functions that take a string as argument. */ - Term mkBVFromStrHelper(std::string s, uint32_t base) const; + Term mkBVFromStrHelper(const std::string& s, uint32_t base) const; /* Helper for mkBitVector functions that take a string and a size as * arguments. */ - Term mkBVFromStrHelper(uint32_t size, std::string s, uint32_t base) const; + Term mkBVFromStrHelper(uint32_t size, const std::string& s, uint32_t base) const; /* Helper for mkBitVector functions that take an integer as argument. */ Term mkBVFromIntHelper(uint32_t size, uint64_t val) const; /* Helper for setLogic. */ void setLogicHelper(const std::string& logic) const; /* Helper for mkTerm functions that create Term from a Kind */ Term mkTermFromKind(Kind kind) const; + /* Helper for mkChar functions that take a string as argument. */ + Term mkCharFromStrHelper(const std::string& s) const; /** * Helper function that ensures that a given term is of sort real (as opposed diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g index d0a73ce6a..1d0fb71cb 100644 --- a/src/parser/smt2/Smt2.g +++ b/src/parser/smt2/Smt2.g @@ -2083,6 +2083,11 @@ termAtomic[CVC4::api::Term& atomTerm] api::Term v2 = SOLVER->mkConst(api::Sort(type2), "_emp2"); atomTerm = SOLVER->mkTerm(api::SEP_EMP, v1, v2); } + | CHAR_TOK HEX_LITERAL + { + std::string hexStr = AntlrInput::tokenTextSubstr($HEX_LITERAL, 2); + atomTerm = SOLVER->mkChar(hexStr); + } | sym=SIMPLE_SYMBOL nonemptyNumeralList[numerals] { atomTerm = @@ -2094,10 +2099,10 @@ termAtomic[CVC4::api::Term& atomTerm] // Bit-vector constants | HEX_LITERAL - { - assert(AntlrInput::tokenText($HEX_LITERAL).find("#x") == 0); - std::string hexStr = AntlrInput::tokenTextSubstr($HEX_LITERAL, 2); - atomTerm = SOLVER->mkBitVector(hexStr, 16); + { + assert(AntlrInput::tokenText($HEX_LITERAL).find("#x") == 0); + std::string hexStr = AntlrInput::tokenTextSubstr($HEX_LITERAL, 2); + atomTerm = SOLVER->mkBitVector(hexStr, 16); } | BINARY_LITERAL { @@ -2686,6 +2691,7 @@ FORALL_TOK : 'forall'; CHOICE_TOK : { !PARSER_STATE->strictModeEnabled() }? 'choice'; EMP_TOK : { PARSER_STATE->isTheoryEnabled(theory::THEORY_SEP) }? 'emp'; +CHAR_TOK : { PARSER_STATE->isTheoryEnabled(theory::THEORY_STRINGS) }? 'char'; TUPLE_CONST_TOK: { PARSER_STATE->isTheoryEnabled(theory::THEORY_DATATYPES) }? 'mkTuple'; TUPLE_SEL_TOK: { PARSER_STATE->isTheoryEnabled(theory::THEORY_DATATYPES) }? 'tupSel'; diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index b9835d919..87bea4583 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -918,6 +918,7 @@ set(regress_0_tests regress0/strings/bug002.smt2 regress0/strings/bug612.smt2 regress0/strings/bug613.smt2 + regress0/strings/char-representations.smt2 regress0/strings/code-eval.smt2 regress0/strings/code-perf.smt2 regress0/strings/code-sat-neg-one.smt2 diff --git a/test/regress/regress0/strings/char-representations.smt2 b/test/regress/regress0/strings/char-representations.smt2 new file mode 100644 index 000000000..aaf119ab4 --- /dev/null +++ b/test/regress/regress0/strings/char-representations.smt2 @@ -0,0 +1,22 @@ +; COMMAND-LINE: --lang=smt2.6.1 +; EXPECT: sat +(set-logic SLIA) +(set-info :status sat) + +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) + +(assert (= x (_ char #x0D4))) +(assert (= x "\u00d4")) + + +(assert (= y (_ char #x1))) +(assert (= y "\u0001")) + +(assert (= z (_ char #xAF))) +(assert (= z (_ char #x0af))) +(assert (= z "\u{af}")) +(assert (= z "\u00af")) + +(check-sat) diff --git a/test/regress/regress0/strings/unicode-esc.smt2 b/test/regress/regress0/strings/unicode-esc.smt2 index 01f5f30ab..02b313d4a 100644 --- a/test/regress/regress0/strings/unicode-esc.smt2 +++ b/test/regress/regress0/strings/unicode-esc.smt2 @@ -26,5 +26,7 @@ (assert (= (str.len "\u{E}") 1)) (assert (= (str.len "\u{44444}") 9)) (assert (= (str.len "\u") 2)) +(assert (= (str.len "\u001") 5)) +(assert (= (str.len "\u0001") 1)) (check-sat) diff --git a/test/unit/api/solver_black.h b/test/unit/api/solver_black.h index 9fd1b7789..7c9af1714 100644 --- a/test/unit/api/solver_black.h +++ b/test/unit/api/solver_black.h @@ -69,6 +69,7 @@ class SolverBlack : public CxxTest::TestSuite void testMkRoundingMode(); void testMkSepNil(); void testMkString(); + void testMkChar(); void testMkTerm(); void testMkTermFromOp(); void testMkTrue(); @@ -560,6 +561,17 @@ void SolverBlack::testMkString() "\"asdf\\u{5c}nasdf\""); } +void SolverBlack::testMkChar() +{ + TS_ASSERT_THROWS_NOTHING(d_solver->mkChar(std::string("0123"))); + TS_ASSERT_THROWS_NOTHING(d_solver->mkChar("aA")); + TS_ASSERT_THROWS(d_solver->mkChar(nullptr), CVC4ApiException&); + TS_ASSERT_THROWS(d_solver->mkChar(""), CVC4ApiException&); + TS_ASSERT_THROWS(d_solver->mkChar("0g0"), CVC4ApiException&); + TS_ASSERT_THROWS(d_solver->mkChar("100000"), CVC4ApiException&); + TS_ASSERT_EQUALS(d_solver->mkChar("abc"), d_solver->mkChar("ABC")); +} + void SolverBlack::testMkTerm() { Sort bv32 = d_solver->mkBitVectorSort(32); -- 2.30.2