From 5cb2ece9eb26bdeb48e4c060b1f1be08ce2be9c2 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 23 Feb 2022 10:46:50 -0600 Subject: [PATCH] Properly sanatize user names in LFSC (#8080) Previously, we would get LFSC proof checking failures if we were using an identifier that was valid in SMT-LIB but not LFSC. Co-authored-by: Alex Ozdemir --- src/proof/lfsc/lfsc_node_converter.cpp | 60 +++++++++++++++---- src/proof/lfsc/lfsc_node_converter.h | 2 + test/unit/CMakeLists.txt | 1 + test/unit/proof/CMakeLists.txt | 1 + test/unit/proof/lfsc_node_converter_black.cpp | 60 +++++++++++++++++++ 5 files changed, 111 insertions(+), 13 deletions(-) create mode 100644 test/unit/proof/CMakeLists.txt create mode 100644 test/unit/proof/lfsc_node_converter_black.cpp diff --git a/src/proof/lfsc/lfsc_node_converter.cpp b/src/proof/lfsc/lfsc_node_converter.cpp index 1ff5d2a0f..3dc0aa3aa 100644 --- a/src/proof/lfsc/lfsc_node_converter.cpp +++ b/src/proof/lfsc/lfsc_node_converter.cpp @@ -15,6 +15,7 @@ #include "proof/lfsc/lfsc_node_converter.h" +#include #include #include "expr/array_store_all.h" @@ -142,10 +143,7 @@ Node LfscNodeConverter::postConvert(Node n) } else if (n.isVar()) { - std::stringstream ss; - ss << n; - Node nn = mkInternalSymbol(getNameForUserName(ss.str()), tn); - return nn; + return mkInternalSymbol(getNameForUserNameOf(n), tn); } else if (k == APPLY_UF) { @@ -604,9 +602,48 @@ TypeNode LfscNodeConverter::postConvertType(TypeNode tn) std::string LfscNodeConverter::getNameForUserName(const std::string& name) { - std::stringstream ss; - ss << "cvc." << name; - return ss.str(); + // For user name X, we do cvc.Y, where Y contains an escaped version of X. + // Specifically, since LFSC does not allow these characters in identifier + // bodies: "() \t\n\f;", each is replaced with an escape sequence "\xXX" + // where XX is the zero-padded hex representation of the code point. "\\" is + // also escaped. + // + // See also: https://github.com/cvc5/LFSC/blob/master/src/lexer.flex#L24 + // + // The "cvc." prefix ensures we do not clash with LFSC definitions. + // + // The escaping ensures that all names are valid LFSC identifiers. + std::string sanitized("cvc."); + size_t found = sanitized.size(); + sanitized += name; + // The following sanitizes symbols that are not allowed in LFSC identifiers + // here, e.g. + // |a b| + // is converted to: + // cvc.a\x20b + // where "20" is the hex unicode value of " ". + do + { + found = sanitized.find_first_of("() \t\n\f\\;", found); + if (found != std::string::npos) + { + // Emit hex sequence + std::stringstream seq; + seq << "\\x" << std::setbase(16) << std::setfill('0') << std::setw(2) + << static_cast(sanitized[found]); + sanitized.replace(found, 1, seq.str()); + // increment found over the escape + found += 3; + } + } while (found != std::string::npos); + return sanitized; +} + +std::string LfscNodeConverter::getNameForUserNameOf(Node v) +{ + std::string name; + v.getAttribute(expr::VarNameAttr(), name); + return getNameForUserName(name); } bool LfscNodeConverter::shouldTraverse(Node n) @@ -871,11 +908,9 @@ Node LfscNodeConverter::getOperatorOfTerm(Node n, bool macroApply) { Assert(indices.size() == 1); // must convert to user name - std::stringstream sss; - sss << indices[0]; TypeNode intType = nm->integerType(); indices[0] = - getSymbolInternal(k, intType, getNameForUserName(sss.str())); + getSymbolInternal(k, intType, getNameForUserNameOf(indices[0])); } } else if (op.getType().isFunction()) @@ -918,9 +953,8 @@ Node LfscNodeConverter::getOperatorOfTerm(Node n, bool macroApply) { unsigned index = DType::indexOf(op); const DType& dt = DType::datatypeOf(op); - std::stringstream ssc; - ssc << dt[index].getConstructor(); - opName << getNameForUserName(ssc.str()); + // get its variable name + opName << getNameForUserNameOf(dt[index].getConstructor()); } else if (k == APPLY_SELECTOR) { diff --git a/src/proof/lfsc/lfsc_node_converter.h b/src/proof/lfsc/lfsc_node_converter.h index 07eed996c..bbfbaba8e 100644 --- a/src/proof/lfsc/lfsc_node_converter.h +++ b/src/proof/lfsc/lfsc_node_converter.h @@ -103,6 +103,8 @@ class LfscNodeConverter : public NodeConverter /** get name for user name */ static std::string getNameForUserName(const std::string& name); + /** get name for the name of node v, where v should be a variable */ + static std::string getNameForUserNameOf(Node v); private: /** Should we traverse n? */ diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 0ba29544e..1fbaf5ba2 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -102,6 +102,7 @@ if(ENABLE_UNIT_TESTING) add_subdirectory(options) add_subdirectory(parser) add_subdirectory(printer) + add_subdirectory(proof) add_subdirectory(prop) add_subdirectory(theory) add_subdirectory(preprocessing) diff --git a/test/unit/proof/CMakeLists.txt b/test/unit/proof/CMakeLists.txt new file mode 100644 index 000000000..749948e39 --- /dev/null +++ b/test/unit/proof/CMakeLists.txt @@ -0,0 +1 @@ +cvc5_add_unit_test_black(lfsc_node_converter_black proof) diff --git a/test/unit/proof/lfsc_node_converter_black.cpp b/test/unit/proof/lfsc_node_converter_black.cpp new file mode 100644 index 000000000..31badc812 --- /dev/null +++ b/test/unit/proof/lfsc_node_converter_black.cpp @@ -0,0 +1,60 @@ +/****************************************************************************** + * Top contributors (to current version): + * Alex Ozdemir + * + * This file is part of the cvc5 project. + * + * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS + * in the top-level source directory and their institutional affiliations. + * All rights reserved. See the file COPYING in the top-level source + * directory for licensing information. + * **************************************************************************** + * + * Black box testing of LFSC proof conversion + */ + +#include + +#include "proof/lfsc/lfsc_node_converter.h" +#include "test.h" + +namespace cvc5 { +using namespace cvc5::proof; + +namespace test { + +class TestLfscNodeConverterBlack : public TestInternal +{ +}; + +TEST_F(TestLfscNodeConverterBlack, ident_sanitize) +{ + // LFSC does not allow these characters in identifier bodies: "() \t\n\f;" + // + // See also: https://github.com/cvc5/LFSC/blob/master/src/lexer.flex#L24 + // + // We use hex escapes that are valid in Python: + // https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals + ASSERT_EQ(LfscNodeConverter::getNameForUserName("there are spaces"), + R"x(cvc.there\x20are\x20spaces)x"); + ASSERT_EQ(LfscNodeConverter::getNameForUserName(" "), R"x(cvc.\x20\x20)x"); + ASSERT_EQ(LfscNodeConverter::getNameForUserName("there\\are\\slashes"), + R"x(cvc.there\x5care\x5cslashes)x"); + ASSERT_EQ(LfscNodeConverter::getNameForUserName("there\nare\nnewlines"), + R"x(cvc.there\x0aare\x0anewlines)x"); + ASSERT_EQ(LfscNodeConverter::getNameForUserName("there\rare\rCRs"), + "cvc.there\rare\rCRs"); + ASSERT_EQ(LfscNodeConverter::getNameForUserName("there\tare\ttabs"), + R"x(cvc.there\x09are\x09tabs)x"); + ASSERT_EQ(LfscNodeConverter::getNameForUserName("there(are(parens"), + R"x(cvc.there\x28are\x28parens)x"); + ASSERT_EQ(LfscNodeConverter::getNameForUserName("there)are)parens"), + R"x(cvc.there\x29are\x29parens)x"); + ASSERT_EQ(LfscNodeConverter::getNameForUserName("there;are;semis"), + R"x(cvc.there\x3bare\x3bsemis)x"); + ASSERT_EQ(LfscNodeConverter::getNameForUserName(""), R"x(cvc.)x"); +} + +} // namespace test + +} // namespace cvc5 -- 2.30.2