From: Andrew Reynolds Date: Thu, 1 Aug 2019 14:08:46 +0000 (-0500) Subject: Regular expression intersection modes (#3134) X-Git-Tag: cvc5-1.0.0~4060 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=79881c196e29ef341166e7a31c1183e8b537d069;p=cvc5.git Regular expression intersection modes (#3134) --- diff --git a/src/options/CMakeLists.txt b/src/options/CMakeLists.txt index fc725978e..70af2f056 100644 --- a/src/options/CMakeLists.txt +++ b/src/options/CMakeLists.txt @@ -37,8 +37,8 @@ libcvc4_add_sources( set_language.h smt_modes.cpp smt_modes.h - strings_process_loop_mode.cpp - strings_process_loop_mode.h + strings_modes.cpp + strings_modes.h sygus_out_mode.h theoryof_mode.cpp theoryof_mode.h diff --git a/src/options/options_handler.cpp b/src/options/options_handler.cpp index 43602c0a1..15436646f 100644 --- a/src/options/options_handler.cpp +++ b/src/options/options_handler.cpp @@ -1386,7 +1386,7 @@ theory::bv::BvSlicerMode OptionsHandler::stringToBvSlicerMode( } } -const std::string OptionsHandler::s_stringToStringsProcessLoopModeHelp = +const std::string OptionsHandler::s_stringsProcessLoopModeHelp = "Loop processing modes supported by the --strings-process-loop-mode " "option:\n" "\n" @@ -1430,7 +1430,7 @@ theory::strings::ProcessLoopMode OptionsHandler::stringToStringsProcessLoopMode( } else if (optarg == "help") { - puts(s_stringToStringsProcessLoopModeHelp.c_str()); + puts(s_stringsProcessLoopModeHelp.c_str()); exit(1); } else @@ -1441,6 +1441,57 @@ theory::strings::ProcessLoopMode OptionsHandler::stringToStringsProcessLoopMode( } } +const std::string OptionsHandler::s_regExpInterModeHelp = + "\ +Regular expression intersection modes supported by the --re-inter-mode option\ +\n\ +\n\ +all \n\ ++ Compute intersections for all regular expressions.\n\ +\n\ +constant (default)\n\ ++ Compute intersections only between regular expressions that do not contain\ +re.allchar or re.range\n\ +\n\ +one-constant\n\ ++ Compute intersections only between regular expressions such that at least one\ +side does not contain re.allchar or re.range\n\ +\n\ +none\n\ ++ Do not compute intersections for regular expressions\n\ +"; + +theory::strings::RegExpInterMode OptionsHandler::stringToRegExpInterMode( + std::string option, std::string optarg) +{ + if (optarg == "all") + { + return theory::strings::RegExpInterMode::RE_INTER_ALL; + } + else if (optarg == "constant") + { + return theory::strings::RegExpInterMode::RE_INTER_CONSTANT; + } + else if (optarg == "one-constant") + { + return theory::strings::RegExpInterMode::RE_INTER_ONE_CONSTANT; + } + else if (optarg == "none") + { + return theory::strings::RegExpInterMode::RE_INTER_NONE; + } + else if (optarg == "help") + { + puts(s_regExpInterModeHelp.c_str()); + exit(1); + } + else + { + throw OptionException(std::string("unknown option for --re-inter-mode: `") + + optarg + "'. Try --re-inter-mode=help."); + } +} + const std::string OptionsHandler::s_boolToBVModeHelp = "\ BoolToBV pass modes supported by the --bool-to-bv option:\n\ @@ -1768,7 +1819,7 @@ literals\n\ + block models based on the SAT skeleton\n\ \n\ values\n\ -+ block models based on the concrete model values for the free variables.\n\ ++ block models based on the concrete model values for the free variables.\n\ \n\ "; diff --git a/src/options/options_handler.h b/src/options/options_handler.h index 59503552c..9a596ddfc 100644 --- a/src/options/options_handler.h +++ b/src/options/options_handler.h @@ -37,7 +37,7 @@ #include "options/printer_modes.h" #include "options/quantifiers_modes.h" #include "options/smt_modes.h" -#include "options/strings_process_loop_mode.h" +#include "options/strings_modes.h" #include "options/sygus_out_mode.h" #include "options/theoryof_mode.h" #include "options/ufss_mode.h" @@ -153,6 +153,8 @@ public: theory::strings::ProcessLoopMode stringToStringsProcessLoopMode( std::string option, std::string optarg); + theory::strings::RegExpInterMode stringToRegExpInterMode(std::string option, + std::string optarg); // theory/uf/options_handlers.h theory::uf::UfssMode stringToUfssMode(std::string option, std::string optarg); @@ -242,7 +244,8 @@ public: static const std::string s_bvOptimizeSatProofHelp; static const std::string s_booleanTermConversionModeHelp; static const std::string s_bvSlicerModeHelp; - static const std::string s_stringToStringsProcessLoopModeHelp; + static const std::string s_stringsProcessLoopModeHelp; + static const std::string s_regExpInterModeHelp; static const std::string s_boolToBVModeHelp; static const std::string s_cegqiFairModeHelp; static const std::string s_decisionModeHelp; diff --git a/src/options/strings_modes.cpp b/src/options/strings_modes.cpp new file mode 100644 index 000000000..c56c82716 --- /dev/null +++ b/src/options/strings_modes.cpp @@ -0,0 +1,71 @@ +/********************* */ +/*! \file strings_modes.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Modes for the string solver. + **/ + +#include "options/strings_modes.h" + +#include +#include + +namespace CVC4 { + +std::ostream& operator<<(std::ostream& out, + theory::strings::ProcessLoopMode mode) +{ + switch (mode) + { + case theory::strings::ProcessLoopMode::FULL: + out << "ProcessLoopMode::FULL"; + break; + case theory::strings::ProcessLoopMode::SIMPLE: + out << "ProcessLoopMode::SIMPLE"; + break; + case theory::strings::ProcessLoopMode::SIMPLE_ABORT: + out << "ProcessLoopMode::SIMPLE_ABORT"; + break; + case theory::strings::ProcessLoopMode::NONE: + out << "ProcessLoopMode::NONE"; + break; + case theory::strings::ProcessLoopMode::ABORT: + out << "ProcessLoopMode::ABORT"; + break; + default: + out << "ProcessLoopMode:UNKNOWN![" << static_cast(mode) << "]"; + } + return out; +} + +std::ostream& operator<<(std::ostream& out, + theory::strings::RegExpInterMode mode) +{ + switch (mode) + { + case theory::strings::RegExpInterMode::RE_INTER_ALL: + out << "RegExpInterMode::RE_INTER_ALL"; + break; + case theory::strings::RegExpInterMode::RE_INTER_CONSTANT: + out << "RegExpInterMode::RE_INTER_CONSTANT"; + break; + case theory::strings::RegExpInterMode::RE_INTER_ONE_CONSTANT: + out << "RegExpInterMode::RE_INTER_ONE_CONSTANT"; + break; + case theory::strings::RegExpInterMode::RE_INTER_NONE: + out << "RegExpInterMode::RE_INTER_NONE"; + break; + default: + out << "RegExpInterMode:UNKNOWN![" << static_cast(mode) << "]"; + } + return out; +} + +} // namespace CVC4 diff --git a/src/options/strings_modes.h b/src/options/strings_modes.h new file mode 100644 index 000000000..7823ea8c7 --- /dev/null +++ b/src/options/strings_modes.h @@ -0,0 +1,78 @@ +/********************* */ +/*! \file strings_modes.h + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Modes for the string solver. + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__STRINGS_MODES_H +#define CVC4__THEORY__STRINGS__STRINGS_MODES_H + +#include + +namespace CVC4 { +namespace theory { +namespace strings { + +/** Enumeration of string processing loop modes */ +enum ProcessLoopMode +{ + /** Perform full loop processing. */ + FULL, + + /** Omit normal loop breaking. */ + SIMPLE, + + /** Abort if normal loop breaking is required. */ + SIMPLE_ABORT, + + /** Omit loop processing. */ + NONE, + + /** Abort if looping word equations are encountered. */ + ABORT +}; // enum ProcessLoopMode + +/** Enumeration of regular expression intersection modes */ +enum RegExpInterMode +{ + /** Compute intersections for all regular expressions. */ + RE_INTER_ALL, + + /** + * Compute intersections only for regular expressions without re.allchar + * and re.range. + */ + RE_INTER_CONSTANT, + + /** + * Compute intersections only between regular expressions where one side does + * not contain re.allchar and re.range. + */ + RE_INTER_ONE_CONSTANT, + + /** Do not compute intersections of regular expressions. */ + RE_INTER_NONE, +}; // enum RegExpInterMode + +} // namespace strings +} // namespace theory + +std::ostream& operator<<(std::ostream& out, + theory::strings::ProcessLoopMode mode); + +std::ostream& operator<<(std::ostream& out, + theory::strings::RegExpInterMode mode); + +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__STRINGS_MODES_H */ diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index fd00b8917..2d8411256 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -135,8 +135,8 @@ header = "options/strings_options.h" type = "CVC4::theory::strings::ProcessLoopMode" default = "CVC4::theory::strings::ProcessLoopMode::FULL" handler = "stringToStringsProcessLoopMode" - includes = ["options/strings_process_loop_mode.h"] - help = "choose how to process looping string equations, see --strings-process-loop-mode=help for details" + includes = ["options/strings_modes.h"] + help = "determines how to process looping string equations" [[option]] name = "stringInferAsLemmas" @@ -216,3 +216,13 @@ header = "options/strings_options.h" default = "true" read_only = true help = "do flat form inferences" + +[[option]] + name = "stringRegExpInterMode" + category = "expert" + long = "re-inter-mode=MODE" + type = "CVC4::theory::strings::RegExpInterMode" + default = "CVC4::theory::strings::RE_INTER_CONSTANT" + handler = "stringToRegExpInterMode" + includes = ["options/strings_modes.h"] + help = "determines which regular expressions intersections to compute" diff --git a/src/options/strings_process_loop_mode.cpp b/src/options/strings_process_loop_mode.cpp deleted file mode 100644 index 59667ea75..000000000 --- a/src/options/strings_process_loop_mode.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/********************* */ -/*! \file strings_process_loop_mode.cpp - ** \verbatim - ** Top contributors (to current version): - ** Andres Noetzli - ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. - ** All rights reserved. See the file COPYING in the top-level source - ** directory for licensing information.\endverbatim - ** - ** \brief Modes for processing looping word equations in the string solver. - ** - ** Modes for processing looping word equations in the string solver. - **/ - -#include "options/strings_process_loop_mode.h" - -#include -#include - -namespace CVC4 { - -std::ostream& operator<<(std::ostream& out, - theory::strings::ProcessLoopMode mode) -{ - switch (mode) - { - case theory::strings::ProcessLoopMode::FULL: - out << "ProcessLoopMode::FULL"; - break; - case theory::strings::ProcessLoopMode::SIMPLE: - out << "ProcessLoopMode::SIMPLE"; - break; - case theory::strings::ProcessLoopMode::SIMPLE_ABORT: - out << "ProcessLoopMode::SIMPLE_ABORT"; - break; - case theory::strings::ProcessLoopMode::NONE: - out << "ProcessLoopMode::NONE"; - break; - case theory::strings::ProcessLoopMode::ABORT: - out << "ProcessLoopMode::ABORT"; - break; - default: - out << "ProcessLoopMode:UNKNOWN![" << static_cast(mode) << "]"; - } - return out; -} - -} // namespace CVC4 diff --git a/src/options/strings_process_loop_mode.h b/src/options/strings_process_loop_mode.h deleted file mode 100644 index fb2248eec..000000000 --- a/src/options/strings_process_loop_mode.h +++ /dev/null @@ -1,55 +0,0 @@ -/********************* */ -/*! \file strings_process_loop_mode.h - ** \verbatim - ** Top contributors (to current version): - ** Andres Noetzli - ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. - ** All rights reserved. See the file COPYING in the top-level source - ** directory for licensing information.\endverbatim - ** - ** \brief Modes for processing looping word equations in the string solver. - ** - ** Modes for processing looping word equations in the string solver. - **/ - -#include "cvc4_private.h" - -#ifndef CVC4__THEORY__STRINGS__PROCESS_LOOP_MODE_H -#define CVC4__THEORY__STRINGS__PROCESS_LOOP_MODE_H - -#include - -namespace CVC4 { -namespace theory { -namespace strings { - -/** Enumeration of bit-blasting modes */ -enum class ProcessLoopMode -{ - /** Perform full loop processing. */ - FULL, - - /** Omit normal loop breaking. */ - SIMPLE, - - /** Abort if normal loop breaking is required. */ - SIMPLE_ABORT, - - /** Omit loop processing. */ - NONE, - - /** Abort if looping word equations are encountered. */ - ABORT -}; // enum ProcessLoopMode - -} // namespace strings -} // namespace theory - -std::ostream& operator<<(std::ostream& out, - theory::strings::ProcessLoopMode mode); - -} // namespace CVC4 - -#endif /* CVC4__THEORY__BV__BITBLAST_MODE_H */ diff --git a/src/smt/smt_engine.cpp b/src/smt/smt_engine.cpp index f75726be2..709df6c9f 100644 --- a/src/smt/smt_engine.cpp +++ b/src/smt/smt_engine.cpp @@ -68,8 +68,8 @@ #include "options/sep_options.h" #include "options/set_language.h" #include "options/smt_options.h" +#include "options/strings_modes.h" #include "options/strings_options.h" -#include "options/strings_process_loop_mode.h" #include "options/theory_options.h" #include "options/uf_options.h" #include "preprocessing/preprocessing_pass.h" diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index f11254794..7286e2fb4 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -47,24 +47,61 @@ RegExpOpr::RegExpOpr() RegExpOpr::~RegExpOpr() {} bool RegExpOpr::checkConstRegExp( Node r ) { - Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with /" << mkString( r ) << "/" << std::endl; - bool ret = true; - if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) { - ret = d_cstre_cache[r]; - } else { - if(r.getKind() == kind::STRING_TO_REGEXP) { - Node tmp = Rewriter::rewrite( r[0] ); - ret = tmp.isConst(); - } else { - for(unsigned i=0; i::iterator it; + std::vector visit; + TNode cur; + visit.push_back(r); + do + { + cur = visit.back(); + visit.pop_back(); + it = d_constCache.find(cur); + + if (it == d_constCache.end()) + { + Kind ck = cur.getKind(); + if (ck == STRING_TO_REGEXP) + { + Node tmp = Rewriter::rewrite(cur[0]); + d_constCache[cur] = + tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE; + } + else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE) + { + d_constCache[cur] = RE_C_CONSTANT; + } + else + { + d_constCache[cur] = RE_C_UNKNOWN; + visit.push_back(cur); + visit.insert(visit.end(), cur.begin(), cur.end()); + } + } + else if (it->second == RE_C_UNKNOWN) + { + RegExpConstType ret = RE_C_CONRETE_CONSTANT; + for (const Node& cn : cur) + { + it = d_constCache.find(cn); + Assert(it != d_constCache.end()); + if (it->second > ret) + { + ret = it->second; } } + d_constCache[cur] = ret; } - d_cstre_cache[r] = ret; - } - return ret; + } while (!visit.empty()); + Assert(d_constCache.find(r) != d_constCache.end()); + return d_constCache[r]; } // 0-unknown, 1-yes, 2-no diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index 8f9541e91..cb35b37c6 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -33,6 +33,25 @@ namespace CVC4 { namespace theory { namespace strings { +/** + * Information on whether regular expressions contain constants or re.allchar. + * + * The order of this enumeration matters: the larger the value, the more + * possible regular expressions could fit the description. + */ +enum RegExpConstType +{ + // the regular expression doesn't contain variables or re.allchar or re.range + RE_C_CONRETE_CONSTANT, + // the regular expression doesn't contain variables, but may contain + // re.allchar or re.range + RE_C_CONSTANT, + // the regular expression may contain variables + RE_C_VARIABLE, + // the status of the regular expression is unknown (used internally) + RE_C_UNKNOWN, +}; + class RegExpOpr { typedef std::pair< Node, CVC4::String > PairNodeStr; typedef std::set< Node > SetNodes; @@ -58,7 +77,8 @@ class RegExpOpr { std::map d_dv_cache; std::map > d_deriv_cache; std::map > d_compl_cache; - std::map d_cstre_cache; + /** cache mapping regular expressions to whether they contain constants */ + std::unordered_map d_constCache; std::map, std::set > > d_cset_cache; std::map, std::set > > d_fset_cache; std::map d_inter_cache; @@ -96,6 +116,8 @@ class RegExpOpr { * are such that t is a constant (or rewrites to one). */ bool checkConstRegExp( Node r ); + /** get the constant type for regular expression r */ + RegExpConstType getRegExpConstType(Node r); void simplify(Node t, std::vector< Node > &new_nodes, bool polarity); int delta( Node r, Node &exp ); int derivativeS( Node r, CVC4::String c, Node &retNode ); diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp index c50889e78..463891839 100644 --- a/src/theory/strings/regexp_solver.cpp +++ b/src/theory/strings/regexp_solver.cpp @@ -248,13 +248,19 @@ void RegExpSolver::check(const std::map >& mems) bool RegExpSolver::checkEqcIntersect(const std::vector& mems) { + // do not compute intersections if the re intersection mode is none + if (options::stringRegExpInterMode() == RE_INTER_NONE) + { + return true; + } if (mems.empty()) { // nothing to do return true; } - // the initial regular expression membership + // the initial regular expression membership and its constant type Node mi; + RegExpConstType rcti = RE_C_UNKNOWN; NodeManager* nm = NodeManager::currentNM(); for (const Node& m : mems) { @@ -264,15 +270,29 @@ bool RegExpSolver::checkEqcIntersect(const std::vector& mems) Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP); continue; } - if (!d_regexp_opr.checkConstRegExp(m)) + RegExpConstType rct = d_regexp_opr.getRegExpConstType(m); + if (rct == RE_C_VARIABLE + || (options::stringRegExpInterMode() == RE_INTER_CONSTANT + && rct != RE_C_CONRETE_CONSTANT)) { - // cannot do intersection on RE with variables + // cannot do intersection on RE with variables, or with re.allchar based + // on option. continue; } + if (options::stringRegExpInterMode() == RE_INTER_ONE_CONSTANT) + { + if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT) + { + // if both have re.allchar, do not do intersection if the + // RE_INTER_ONE_CONSTANT option is set. + continue; + } + } if (mi.isNull()) { // first regular expression seen mi = m; + rcti = rct; continue; } bool spflag = false; diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index df278ba5a..1b1f87bfc 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1588,6 +1588,7 @@ set(regress_1_tests regress1/strings/nterm-re-inter-sigma.smt2 regress1/strings/pierre150331.smt2 regress1/strings/policy_variable.smt2 + regress1/strings/re-all-char-hard.smt2 regress1/strings/re-agg-total1.smt2 regress1/strings/re-agg-total2.smt2 regress1/strings/re-elim-exact.smt2 diff --git a/test/regress/regress1/strings/re-all-char-hard.smt2 b/test/regress/regress1/strings/re-all-char-hard.smt2 new file mode 100644 index 000000000..2a643ac88 --- /dev/null +++ b/test/regress/regress1/strings/re-all-char-hard.smt2 @@ -0,0 +1,11 @@ +; COMMAND-LINE: --no-re-elim +; EXPECT: sat +(set-info :smt-lib-version 2.6) +(set-logic ALL) +(set-info :status sat) +(declare-fun x () String) + +; we should not intersect these two regular expressions +(assert (str.in.re x (re.++ (str.to.re "abc:def:ghi:") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":cluster/sflower-bottle-guide-") (re.* re.allchar )))) +(assert (str.in.re x (re.++ (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar )))) +(check-sat)