From f5f7ecf3ddd9ed23e5e44f2eefd41c1b11f2a70a Mon Sep 17 00:00:00 2001 From: Morgan Deters Date: Mon, 10 Feb 2014 21:05:16 -0500 Subject: [PATCH] New translation work, support Z3-str-style string constraints. --- src/main/Makefile.am | 8 +- src/main/translator.cpp | 175 ++++++++++++++++++++++++++++ src/options/base_options_handlers.h | 55 +++------ src/options/options_template.cpp | 9 +- src/printer/printer.cpp | 3 + src/printer/smt2/smt2_printer.cpp | 76 ++++++++++-- src/printer/smt2/smt2_printer.h | 8 ++ src/util/language.cpp | 54 ++++++++- src/util/language.h | 13 +++ 9 files changed, 348 insertions(+), 53 deletions(-) create mode 100644 src/main/translator.cpp diff --git a/src/main/Makefile.am b/src/main/Makefile.am index 7e87fa59a..042b7cf45 100644 --- a/src/main/Makefile.am +++ b/src/main/Makefile.am @@ -3,7 +3,7 @@ AM_CPPFLAGS = \ -I@builddir@/.. $(ANTLR_INCLUDES) -I@srcdir@/../include -I@srcdir@/.. AM_CXXFLAGS = -Wall -Wno-unknown-pragmas -bin_PROGRAMS = cvc4 +bin_PROGRAMS = cvc4 cvc4-translator noinst_LIBRARIES = libmain.a @@ -62,6 +62,12 @@ cvc4_LDADD += \ @builddir@/../lib/libreplacements.la endif +cvc4_translator_SOURCES = \ + translator.cpp +cvc4_translator_LDADD = \ + @builddir@/../parser/libcvc4parser.la \ + @builddir@/../libcvc4.la + BUILT_SOURCES = \ $(TOKENS_FILES) diff --git a/src/main/translator.cpp b/src/main/translator.cpp new file mode 100644 index 000000000..7cc90bbd1 --- /dev/null +++ b/src/main/translator.cpp @@ -0,0 +1,175 @@ +/********************* */ +/*! \file translator.cpp + ** \verbatim + ** Original author: Morgan Deters + ** Major contributors: none + ** Minor contributors (to current version): none + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2014 New York University and The University of Iowa + ** See the file COPYING in the top-level source directory for licensing + ** information.\endverbatim + ** + ** \brief Main driver for CVC4 translator executable + ** + ** Main driver for CVC4 translator executable. + **/ + +#include +#include +#include +#include +#include "util/language.h" +#include "expr/command.h" +#include "expr/expr.h" +#include "parser/parser_builder.h" +#include "parser/parser.h" + +using namespace std; +using namespace CVC4; +using namespace CVC4::language; +using namespace CVC4::parser; + +enum { + INPUT_LANG = 'L', + OUTPUT_LANG = 'O', + OUTPUT_FILE = 'o', + HELP = 'h' +};/* enum */ + +const struct option longopts[] = { + { "output-lang", required_argument, NULL, OUTPUT_LANG }, + { "output-language", required_argument, NULL, OUTPUT_LANG }, + { "lang", required_argument, NULL, INPUT_LANG }, + { "language", required_argument, NULL, INPUT_LANG }, + { "out", required_argument, NULL, OUTPUT_FILE }, + { "help", no_argument, NULL, HELP }, + { NULL, no_argument, NULL, 0 }, +};/* longopts */ + +static void showHelp() { + cerr << "cvc4-translator translation tool" << endl + << " --output-language | -O set output language (default smt2)" << endl + << " --input-language | -L set input language (default auto)" << endl + << " --out | -o set output file (- for stdout)" << endl + << " --help | -h this help" << endl + << "Options and input filenames can be intermixed, and order is important." << endl + << "For instance, \"-O smt2 x -O cvc4 y\" reads file x in smt2 format and" + << "file y in cvc4 format and writes all output to stdout." << endl + << "Some canonicalization may occur." << endl + << "Comments and formatting are not preserved." << endl; +} + +static void readFile(const char* filename, InputLanguage fromLang, OutputLanguage toLang, ostream* out) { + if(fromLang == input::LANG_AUTO) { + unsigned len = strlen(filename); + if(len >= 5 && !strcmp(".smt2", filename + len - 5)) { + fromLang = language::input::LANG_SMTLIB_V2; + } else if(len >= 4 && !strcmp(".smt", filename + len - 4)) { + fromLang = language::input::LANG_SMTLIB_V1; + } else if(len >= 5 && !strcmp(".smt1", filename + len - 5)) { + fromLang = language::input::LANG_SMTLIB_V1; + } else if((len >= 2 && !strcmp(".p", filename + len - 2)) || + (len >= 5 && !strcmp(".tptp", filename + len - 5))) { + fromLang = language::input::LANG_TPTP; + } else if(( len >= 4 && !strcmp(".cvc", filename + len - 4) ) || + ( len >= 5 && !strcmp(".cvc4", filename + len - 5) )) { + fromLang = language::input::LANG_CVC4; + } else { + throw Exception("cannot determine input language to use for `" + string(filename) + "'"); + } + } + + if(toLang == output::LANG_AUTO) { + toLang = toOutputLanguage(fromLang); + } + + *out << Expr::setlanguage(toLang); + + Options opts; + opts.set(options::inputLanguage, fromLang); + ExprManager exprMgr(opts); + ParserBuilder parserBuilder(&exprMgr, filename, opts); + if(!strcmp(filename, "-")) { + parserBuilder.withFilename(""); + parserBuilder.withLineBufferedStreamInput(cin); + } + Parser *parser = parserBuilder.build(); + while(Command* cmd = parser->nextCommand()) { + *out << cmd << endl; + if(dynamic_cast(cmd) != NULL) { + delete cmd; + break; + } + delete cmd; + } + *out << flush; + delete parser; +} + +/** + * Translate from an input language to an output language. + */ +int main(int argc, char* argv[]) { + ostream* out = &cout; + InputLanguage fromLang = input::LANG_AUTO; + OutputLanguage toLang = output::LANG_SMTLIB_V2; + size_t files = 0; + + try { + int c; + while((c = getopt_long(argc, argv, "-L:O:o:h", longopts, NULL)) != -1) { + switch(c) { + case 1: + ++files; + readFile(optarg, (!strcmp(optarg, "-") && fromLang == input::LANG_AUTO) ? input::LANG_CVC4 : fromLang, toLang, out); + break; + case INPUT_LANG: + if(!strcmp(optarg, "help")) { + Options::printLanguageHelp(cerr); + exit(1); + } + fromLang = toInputLanguage(optarg); + break; + case OUTPUT_LANG: + if(!strcmp(optarg, "help")) { + Options::printLanguageHelp(cerr); + exit(1); + } + toLang = toOutputLanguage(optarg); + break; + case OUTPUT_FILE: + out->flush(); + if(out != &cout) { + ((ofstream*)out)->close(); + delete out; + } + if(strcmp(optarg, "-")) { + out = new ofstream(optarg); + } else { + out = &cout; + } + break; + case 'h': + showHelp(); + exit(0); + case '?': + showHelp(); + exit(1); + default: + cerr << "internal error. translator failed (" + << char(c) << "," << c << ")." << endl; + exit(1); + } + } + + if(files == 0) { + readFile("-", fromLang == input::LANG_AUTO ? input::LANG_CVC4 : fromLang, toLang, out); + exit(0); + } + } catch(Exception& e) { + cerr << e << endl; + exit(1); + } + + return 0; +} diff --git a/src/options/base_options_handlers.h b/src/options/base_options_handlers.h index a161b6c07..2298f5880 100644 --- a/src/options/base_options_handlers.h +++ b/src/options/base_options_handlers.h @@ -25,6 +25,7 @@ #include #include "expr/command.h" +#include "util/language.h" namespace CVC4 { namespace options { @@ -69,59 +70,33 @@ inline void decreaseVerbosity(std::string option, SmtEngine* smt) { } inline OutputLanguage stringToOutputLanguage(std::string option, std::string optarg, SmtEngine* smt) throw(OptionException) { - if(optarg == "cvc4" || optarg == "pl" || - optarg == "presentation" || optarg == "native" || - optarg == "LANG_CVC4") { - return language::output::LANG_CVC4; - } else if(optarg == "smtlib1" || optarg == "smt1" || - optarg == "LANG_SMTLIB_V1") { - return language::output::LANG_SMTLIB_V1; - } else if(optarg == "smtlib" || optarg == "smt" || - optarg == "smtlib2" || optarg == "smt2" || - optarg == "LANG_SMTLIB_V2") { - return language::output::LANG_SMTLIB_V2; - } else if(optarg == "tptp" || optarg == "LANG_TPTP") { - return language::output::LANG_TPTP; - } else if(optarg == "ast" || optarg == "LANG_AST") { - return language::output::LANG_AST; - } else if(optarg == "auto" || optarg == "LANG_AUTO") { + if(optarg == "help") { + options::languageHelp.set(true); return language::output::LANG_AUTO; } - if(optarg != "help") { - throw OptionException(std::string("unknown language for ") + option + - ": `" + optarg + "'. Try --output-lang help."); + try { + return language::toOutputLanguage(optarg); + } catch(OptionException& oe) { + throw OptionException("Error in " + option + ": " + oe.getMessage() + "\nTry --output-language help"); } - options::languageHelp.set(true); - return language::output::LANG_AUTO; + Unreachable(); } inline InputLanguage stringToInputLanguage(std::string option, std::string optarg, SmtEngine* smt) throw(OptionException) { - if(optarg == "cvc4" || optarg == "pl" || - optarg == "presentation" || optarg == "native" || - optarg == "LANG_CVC4") { - return language::input::LANG_CVC4; - } else if(optarg == "smtlib1" || optarg == "smt1" || - optarg == "LANG_SMTLIB_V1") { - return language::input::LANG_SMTLIB_V1; - } else if(optarg == "smtlib" || optarg == "smt" || - optarg == "smtlib2" || optarg == "smt2" || - optarg == "LANG_SMTLIB_V2") { - return language::input::LANG_SMTLIB_V2; - } else if(optarg == "tptp" || optarg == "LANG_TPTP") { - return language::input::LANG_TPTP; - } else if(optarg == "auto" || optarg == "LANG_AUTO") { + if(optarg == "help") { + options::languageHelp.set(true); return language::input::LANG_AUTO; } - if(optarg != "help") { - throw OptionException(std::string("unknown language for ") + option + - ": `" + optarg + "'. Try --lang help."); + try { + return language::toInputLanguage(optarg); + } catch(OptionException& oe) { + throw OptionException("Error in " + option + ": " + oe.getMessage() + "\nTry --language help"); } - options::languageHelp.set(true); - return language::input::LANG_AUTO; + Unreachable(); } inline void addTraceTag(std::string option, std::string optarg, SmtEngine* smt) { diff --git a/src/options/options_template.cpp b/src/options/options_template.cpp index 9c2d5aaa4..02bfc6ca0 100644 --- a/src/options/options_template.cpp +++ b/src/options/options_template.cpp @@ -260,6 +260,7 @@ Languages currently supported as arguments to the --output-lang option:\n\ cvc4 | presentation | pl CVC4 presentation language\n\ smt1 | smtlib1 SMT-LIB format 1.2\n\ smt | smtlib | smt2 | smtlib2 SMT-LIB format 2.0\n\ + z3str SMT-LIB 2.0 with Z3-str string constraints\n\ tptp TPTP format\n\ ast internal format (simple syntax trees)\n\ "; @@ -311,7 +312,7 @@ static struct option cmdlineOptions[] = {${all_modules_long_options} { NULL, no_argument, NULL, '\0' } };/* cmdlineOptions */ -#line 315 "${template}" +#line 316 "${template}" static void preemptGetopt(int& argc, char**& argv, const char* opt) { const size_t maxoptlen = 128; @@ -504,7 +505,7 @@ std::vector Options::parseOptions(int argc, char* main_argv[]) thro switch(c) { ${all_modules_option_handlers} -#line 508 "${template}" +#line 509 "${template}" case ':': // This can be a long or short option, and the way to get at the @@ -572,7 +573,7 @@ std::vector Options::suggestCommandLineOptions(const std::string& o static const char* smtOptions[] = { ${all_modules_smt_options}, -#line 576 "${template}" +#line 577 "${template}" NULL };/* smtOptions[] */ @@ -594,7 +595,7 @@ SExpr Options::getOptions() const throw() { ${all_modules_get_options} -#line 598 "${template}" +#line 599 "${template}" return SExpr(opts); } diff --git a/src/printer/printer.cpp b/src/printer/printer.cpp index 39d065065..a68c3ca7f 100644 --- a/src/printer/printer.cpp +++ b/src/printer/printer.cpp @@ -49,6 +49,9 @@ Printer* Printer::makePrinter(OutputLanguage lang) throw() { case LANG_CVC4: return new printer::cvc::CvcPrinter(); + case LANG_Z3STR: + return new printer::smt2::Smt2Printer(printer::smt2::z3str_variant); + case LANG_AST: return new printer::ast::AstPrinter(); diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index fcb352ea7..80dcc8248 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -86,6 +86,21 @@ static std::string maybeQuoteSymbol(const std::string& s) { return s; } +static bool stringifyRegexp(Node n, stringstream& ss) { + if(n.getKind() == kind::STRING_TO_REGEXP) { + ss << n[0].getConst().toString(); + } else if(n.getKind() == kind::REGEXP_CONCAT) { + for(unsigned i = 0; i < n.getNumChildren(); ++i) { + if(!stringifyRegexp(n[i], ss)) { + return false; + } + } + } else { + return false; + } + return true; +} + void Smt2Printer::toStream(std::ostream& out, TNode n, int toDepth, bool types) const throw() { // null @@ -279,9 +294,40 @@ void Smt2Printer::toStream(std::ostream& out, TNode n, case kind::ARRAY_TYPE: out << smtKindString(k) << " "; break; // string theory - case kind::STRING_CONCAT: out << "str.++ "; break; - case kind::STRING_IN_REGEXP: out << "str.in.re "; break; - case kind::STRING_LENGTH: out << "str.len "; break; + case kind::STRING_CONCAT: + if(d_variant == z3str_variant) { + out << "Concat "; + for(unsigned i = 0; i < n.getNumChildren(); ++i) { + toStream(out, n[i], -1, types); + if(i + 1 < n.getNumChildren()) { + out << ' '; + } + if(i + 2 < n.getNumChildren()) { + out << "(Concat "; + } + } + for(unsigned i = 0; i < n.getNumChildren() - 1; ++i) { + out << ")"; + } + return; + } + out << "str.++ "; + break; + case kind::STRING_IN_REGEXP: { + stringstream ss; + if(d_variant == z3str_variant && stringifyRegexp(n[1], ss)) { + out << "= "; + toStream(out, n[0], -1, types); + out << " "; + Node str = NodeManager::currentNM()->mkConst(String(ss.str())); + toStream(out, str, -1, types); + out << ")"; + return; + } + out << "str.in.re "; + break; + } + case kind::STRING_LENGTH: out << (d_variant == z3str_variant ? "Length " : "str.len "); break; case kind::STRING_SUBSTR_TOTAL: case kind::STRING_SUBSTR: out << "str.substr "; break; case kind::STRING_CHARAT: out << "str.at "; break; @@ -597,6 +643,8 @@ static void printBvParameterizedOp(std::ostream& out, TNode n) throw() { template static bool tryToStream(std::ostream& out, const Command* c) throw(); +template +static bool tryToStream(std::ostream& out, const Command* c, Variant v) throw(); void Smt2Printer::toStream(std::ostream& out, const Command* c, int toDepth, bool types, size_t dag) const throw() { @@ -624,7 +672,7 @@ void Smt2Printer::toStream(std::ostream& out, const Command* c, tryToStream(out, c) || tryToStream(out, c) || tryToStream(out, c) || - tryToStream(out, c) || + tryToStream(out, c, d_variant) || tryToStream(out, c) || tryToStream(out, c) || tryToStream(out, c) || @@ -773,7 +821,7 @@ static void toStream(std::ostream& out, const PopCommand* c) throw() { static void toStream(std::ostream& out, const CheckSatCommand* c) throw() { Expr e = c->getExpr(); - if(!e.isNull()) { + if(!e.isNull() && !(e.getKind() == kind::CONST_BOOLEAN && e.getConst())) { out << PushCommand() << endl << AssertCommand(e) << endl << CheckSatCommand() << endl @@ -912,8 +960,13 @@ static void toStream(std::ostream& out, const SetBenchmarkStatusCommand* c) thro out << "(set-info :status " << c->getStatus() << ")"; } -static void toStream(std::ostream& out, const SetBenchmarkLogicCommand* c) throw() { - out << "(set-logic " << c->getLogic() << ")"; +static void toStream(std::ostream& out, const SetBenchmarkLogicCommand* c, Variant v) throw() { + // Z3-str doesn't have string-specific logic strings(?), so comment it + if(v == z3str_variant) { + out << "; (set-logic " << c->getLogic() << ")"; + } else { + out << "(set-logic " << c->getLogic() << ")"; + } } static void toStream(std::ostream& out, const SetInfoCommand* c) throw() { @@ -990,6 +1043,15 @@ static bool tryToStream(std::ostream& out, const Command* c) throw() { return false; } +template +static bool tryToStream(std::ostream& out, const Command* c, Variant v) throw() { + if(typeid(*c) == typeid(T)) { + toStream(out, dynamic_cast(c), v); + return true; + } + return false; +} + static void toStream(std::ostream& out, const CommandSuccess* s) throw() { if(Command::printsuccess::getPrintSuccess(out)) { out << "success" << endl; diff --git a/src/printer/smt2/smt2_printer.h b/src/printer/smt2/smt2_printer.h index 871b3823a..c70bb78c3 100644 --- a/src/printer/smt2/smt2_printer.h +++ b/src/printer/smt2/smt2_printer.h @@ -27,11 +27,19 @@ namespace CVC4 { namespace printer { namespace smt2 { +enum Variant { + no_variant, + z3str_variant +};/* enum Variant */ + class Smt2Printer : public CVC4::Printer { + Variant d_variant; + void toStream(std::ostream& out, TNode n, int toDepth, bool types) const throw(); void toStream(std::ostream& out, const Model& m, const Command* c) const throw(); void toStream(std::ostream& out, const Model& m) const throw(); public: + Smt2Printer(Variant variant = no_variant) : d_variant(variant) { } using CVC4::Printer::toStream; void toStream(std::ostream& out, TNode n, int toDepth, bool types, size_t dag) const throw(); void toStream(std::ostream& out, const Command* c, int toDepth, bool types, size_t dag) const throw(); diff --git a/src/util/language.cpp b/src/util/language.cpp index 9cac3dd3f..c5c9828df 100644 --- a/src/util/language.cpp +++ b/src/util/language.cpp @@ -25,6 +25,7 @@ InputLanguage toInputLanguage(OutputLanguage language) { case output::LANG_SMTLIB_V2: case output::LANG_TPTP: case output::LANG_CVC4: + case output::LANG_Z3STR: // these entries directly correspond (by design) return InputLanguage(int(language)); @@ -41,8 +42,9 @@ OutputLanguage toOutputLanguage(InputLanguage language) { switch(language) { case input::LANG_SMTLIB_V1: case input::LANG_SMTLIB_V2: - case input::LANG_CVC4: case input::LANG_TPTP: + case input::LANG_CVC4: + case input::LANG_Z3STR: // these entries directly correspond (by design) return OutputLanguage(int(language)); @@ -61,5 +63,55 @@ OutputLanguage toOutputLanguage(InputLanguage language) { }/* switch(language) */ }/* toOutputLanguage() */ +OutputLanguage toOutputLanguage(std::string language) { + if(language == "cvc4" || language == "pl" || + language == "presentation" || language == "native" || + language == "LANG_CVC4") { + return output::LANG_CVC4; + } else if(language == "smtlib1" || language == "smt1" || + language == "LANG_SMTLIB_V1") { + return output::LANG_SMTLIB_V1; + } else if(language == "smtlib" || language == "smt" || + language == "smtlib2" || language == "smt2" || + language == "LANG_SMTLIB_V2") { + return output::LANG_SMTLIB_V2; + } else if(language == "tptp" || language == "LANG_TPTP") { + return output::LANG_TPTP; + } else if(language == "z3str" || language == "z3-str" || + language == "LANG_Z3STR") { + return output::LANG_Z3STR; + } else if(language == "ast" || language == "LANG_AST") { + return output::LANG_AST; + } else if(language == "auto" || language == "LANG_AUTO") { + return output::LANG_AUTO; + } + + throw OptionException(std::string("unknown output language `" + language + "'")); +}/* toOutputLanguage() */ + +InputLanguage toInputLanguage(std::string language) { + if(language == "cvc4" || language == "pl" || + language == "presentation" || language == "native" || + language == "LANG_CVC4") { + return input::LANG_CVC4; + } else if(language == "smtlib1" || language == "smt1" || + language == "LANG_SMTLIB_V1") { + return input::LANG_SMTLIB_V1; + } else if(language == "smtlib" || language == "smt" || + language == "smtlib2" || language == "smt2" || + language == "LANG_SMTLIB_V2") { + return input::LANG_SMTLIB_V2; + } else if(language == "tptp" || language == "LANG_TPTP") { + return input::LANG_TPTP; + } else if(language == "z3str" || language == "z3-str" || + language == "LANG_Z3STR") { + return input::LANG_Z3STR; + } else if(language == "auto" || language == "LANG_AUTO") { + return input::LANG_AUTO; + } + + throw OptionException(std::string("unknown input language " + language + "'")); +}/* toInputLanguage() */ + }/* CVC4::language namespace */ }/* CVC4 namespace */ diff --git a/src/util/language.h b/src/util/language.h index b83b3d093..c79c4d9aa 100644 --- a/src/util/language.h +++ b/src/util/language.h @@ -23,6 +23,7 @@ #include #include "util/exception.h" +#include "options/option_exception.h" namespace CVC4 { namespace language { @@ -50,6 +51,8 @@ enum CVC4_PUBLIC Language { LANG_TPTP, /** The CVC4 input language */ LANG_CVC4, + /** The Z3-str input language */ + LANG_Z3STR, // START INPUT-ONLY LANGUAGES AT ENUM VALUE 10 // THESE ARE IN PRINCIPLE NOT POSSIBLE OUTPUT LANGUAGES @@ -76,6 +79,9 @@ inline std::ostream& operator<<(std::ostream& out, Language lang) { case LANG_CVC4: out << "LANG_CVC4"; break; + case LANG_Z3STR: + out << "LANG_Z3STR"; + break; default: out << "undefined_input_language"; } @@ -107,6 +113,8 @@ enum CVC4_PUBLIC Language { LANG_TPTP = input::LANG_TPTP, /** The CVC4 output language */ LANG_CVC4 = input::LANG_CVC4, + /** The Z3-str output language */ + LANG_Z3STR = input::LANG_Z3STR, // START OUTPUT-ONLY LANGUAGES AT ENUM VALUE 10 // THESE ARE IN PRINCIPLE NOT POSSIBLE INPUT LANGUAGES @@ -133,6 +141,9 @@ inline std::ostream& operator<<(std::ostream& out, Language lang) { case LANG_CVC4: out << "LANG_CVC4"; break; + case LANG_Z3STR: + out << "LANG_Z3STR"; + break; case LANG_AST: out << "LANG_AST"; break; @@ -153,6 +164,8 @@ namespace language { InputLanguage toInputLanguage(OutputLanguage language) CVC4_PUBLIC; OutputLanguage toOutputLanguage(InputLanguage language) CVC4_PUBLIC; +InputLanguage toInputLanguage(std::string language) CVC4_PUBLIC; +OutputLanguage toOutputLanguage(std::string language) CVC4_PUBLIC; }/* CVC4::language namespace */ }/* CVC4 namespace */ -- 2.30.2