From 79881c196e29ef341166e7a31c1183e8b537d069 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Thu, 1 Aug 2019 09:08:46 -0500 Subject: [PATCH] Regular expression intersection modes (#3134) --- src/options/CMakeLists.txt | 4 +- src/options/options_handler.cpp | 57 +++++++++++++++- src/options/options_handler.h | 7 +- ...rocess_loop_mode.cpp => strings_modes.cpp} | 31 +++++++-- ...gs_process_loop_mode.h => strings_modes.h} | 43 +++++++++--- src/options/strings_options.toml | 14 +++- src/smt/smt_engine.cpp | 2 +- src/theory/strings/regexp_operation.cpp | 67 ++++++++++++++----- src/theory/strings/regexp_operation.h | 24 ++++++- src/theory/strings/regexp_solver.cpp | 26 ++++++- test/regress/CMakeLists.txt | 1 + .../regress1/strings/re-all-char-hard.smt2 | 11 +++ 12 files changed, 243 insertions(+), 44 deletions(-) rename src/options/{strings_process_loop_mode.cpp => strings_modes.cpp} (61%) rename src/options/{strings_process_loop_mode.h => strings_modes.h} (51%) create mode 100644 test/regress/regress1/strings/re-all-char-hard.smt2 diff --git a/src/options/CMakeLists.txt b/src/options/CMakeLists.txt index fc725978e..70af2f056 100644 --- a/src/options/CMakeLists.txt +++ b/src/options/CMakeLists.txt @@ -37,8 +37,8 @@ libcvc4_add_sources( set_language.h smt_modes.cpp smt_modes.h - strings_process_loop_mode.cpp - strings_process_loop_mode.h + strings_modes.cpp + strings_modes.h sygus_out_mode.h theoryof_mode.cpp theoryof_mode.h diff --git a/src/options/options_handler.cpp b/src/options/options_handler.cpp index 43602c0a1..15436646f 100644 --- a/src/options/options_handler.cpp +++ b/src/options/options_handler.cpp @@ -1386,7 +1386,7 @@ theory::bv::BvSlicerMode OptionsHandler::stringToBvSlicerMode( } } -const std::string OptionsHandler::s_stringToStringsProcessLoopModeHelp = +const std::string OptionsHandler::s_stringsProcessLoopModeHelp = "Loop processing modes supported by the --strings-process-loop-mode " "option:\n" "\n" @@ -1430,7 +1430,7 @@ theory::strings::ProcessLoopMode OptionsHandler::stringToStringsProcessLoopMode( } else if (optarg == "help") { - puts(s_stringToStringsProcessLoopModeHelp.c_str()); + puts(s_stringsProcessLoopModeHelp.c_str()); exit(1); } else @@ -1441,6 +1441,57 @@ theory::strings::ProcessLoopMode OptionsHandler::stringToStringsProcessLoopMode( } } +const std::string OptionsHandler::s_regExpInterModeHelp = + "\ +Regular expression intersection modes supported by the --re-inter-mode option\ +\n\ +\n\ +all \n\ ++ Compute intersections for all regular expressions.\n\ +\n\ +constant (default)\n\ ++ Compute intersections only between regular expressions that do not contain\ +re.allchar or re.range\n\ +\n\ +one-constant\n\ ++ Compute intersections only between regular expressions such that at least one\ +side does not contain re.allchar or re.range\n\ +\n\ +none\n\ ++ Do not compute intersections for regular expressions\n\ +"; + +theory::strings::RegExpInterMode OptionsHandler::stringToRegExpInterMode( + std::string option, std::string optarg) +{ + if (optarg == "all") + { + return theory::strings::RegExpInterMode::RE_INTER_ALL; + } + else if (optarg == "constant") + { + return theory::strings::RegExpInterMode::RE_INTER_CONSTANT; + } + else if (optarg == "one-constant") + { + return theory::strings::RegExpInterMode::RE_INTER_ONE_CONSTANT; + } + else if (optarg == "none") + { + return theory::strings::RegExpInterMode::RE_INTER_NONE; + } + else if (optarg == "help") + { + puts(s_regExpInterModeHelp.c_str()); + exit(1); + } + else + { + throw OptionException(std::string("unknown option for --re-inter-mode: `") + + optarg + "'. Try --re-inter-mode=help."); + } +} + const std::string OptionsHandler::s_boolToBVModeHelp = "\ BoolToBV pass modes supported by the --bool-to-bv option:\n\ @@ -1768,7 +1819,7 @@ literals\n\ + block models based on the SAT skeleton\n\ \n\ values\n\ -+ block models based on the concrete model values for the free variables.\n\ ++ block models based on the concrete model values for the free variables.\n\ \n\ "; diff --git a/src/options/options_handler.h b/src/options/options_handler.h index 59503552c..9a596ddfc 100644 --- a/src/options/options_handler.h +++ b/src/options/options_handler.h @@ -37,7 +37,7 @@ #include "options/printer_modes.h" #include "options/quantifiers_modes.h" #include "options/smt_modes.h" -#include "options/strings_process_loop_mode.h" +#include "options/strings_modes.h" #include "options/sygus_out_mode.h" #include "options/theoryof_mode.h" #include "options/ufss_mode.h" @@ -153,6 +153,8 @@ public: theory::strings::ProcessLoopMode stringToStringsProcessLoopMode( std::string option, std::string optarg); + theory::strings::RegExpInterMode stringToRegExpInterMode(std::string option, + std::string optarg); // theory/uf/options_handlers.h theory::uf::UfssMode stringToUfssMode(std::string option, std::string optarg); @@ -242,7 +244,8 @@ public: static const std::string s_bvOptimizeSatProofHelp; static const std::string s_booleanTermConversionModeHelp; static const std::string s_bvSlicerModeHelp; - static const std::string s_stringToStringsProcessLoopModeHelp; + static const std::string s_stringsProcessLoopModeHelp; + static const std::string s_regExpInterModeHelp; static const std::string s_boolToBVModeHelp; static const std::string s_cegqiFairModeHelp; static const std::string s_decisionModeHelp; diff --git a/src/options/strings_process_loop_mode.cpp b/src/options/strings_modes.cpp similarity index 61% rename from src/options/strings_process_loop_mode.cpp rename to src/options/strings_modes.cpp index 59667ea75..c56c82716 100644 --- a/src/options/strings_process_loop_mode.cpp +++ b/src/options/strings_modes.cpp @@ -1,5 +1,5 @@ /********************* */ -/*! \file strings_process_loop_mode.cpp +/*! \file strings_modes.cpp ** \verbatim ** Top contributors (to current version): ** Andres Noetzli @@ -9,12 +9,10 @@ ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** - ** \brief Modes for processing looping word equations in the string solver. - ** - ** Modes for processing looping word equations in the string solver. + ** \brief Modes for the string solver. **/ -#include "options/strings_process_loop_mode.h" +#include "options/strings_modes.h" #include #include @@ -47,4 +45,27 @@ std::ostream& operator<<(std::ostream& out, return out; } +std::ostream& operator<<(std::ostream& out, + theory::strings::RegExpInterMode mode) +{ + switch (mode) + { + case theory::strings::RegExpInterMode::RE_INTER_ALL: + out << "RegExpInterMode::RE_INTER_ALL"; + break; + case theory::strings::RegExpInterMode::RE_INTER_CONSTANT: + out << "RegExpInterMode::RE_INTER_CONSTANT"; + break; + case theory::strings::RegExpInterMode::RE_INTER_ONE_CONSTANT: + out << "RegExpInterMode::RE_INTER_ONE_CONSTANT"; + break; + case theory::strings::RegExpInterMode::RE_INTER_NONE: + out << "RegExpInterMode::RE_INTER_NONE"; + break; + default: + out << "RegExpInterMode:UNKNOWN![" << static_cast(mode) << "]"; + } + return out; +} + } // namespace CVC4 diff --git a/src/options/strings_process_loop_mode.h b/src/options/strings_modes.h similarity index 51% rename from src/options/strings_process_loop_mode.h rename to src/options/strings_modes.h index fb2248eec..7823ea8c7 100644 --- a/src/options/strings_process_loop_mode.h +++ b/src/options/strings_modes.h @@ -1,5 +1,5 @@ /********************* */ -/*! \file strings_process_loop_mode.h +/*! \file strings_modes.h ** \verbatim ** Top contributors (to current version): ** Andres Noetzli @@ -9,15 +9,13 @@ ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** - ** \brief Modes for processing looping word equations in the string solver. - ** - ** Modes for processing looping word equations in the string solver. + ** \brief Modes for the string solver. **/ #include "cvc4_private.h" -#ifndef CVC4__THEORY__STRINGS__PROCESS_LOOP_MODE_H -#define CVC4__THEORY__STRINGS__PROCESS_LOOP_MODE_H +#ifndef CVC4__THEORY__STRINGS__STRINGS_MODES_H +#define CVC4__THEORY__STRINGS__STRINGS_MODES_H #include @@ -25,8 +23,8 @@ namespace CVC4 { namespace theory { namespace strings { -/** Enumeration of bit-blasting modes */ -enum class ProcessLoopMode +/** Enumeration of string processing loop modes */ +enum ProcessLoopMode { /** Perform full loop processing. */ FULL, @@ -42,7 +40,29 @@ enum class ProcessLoopMode /** Abort if looping word equations are encountered. */ ABORT -}; // enum ProcessLoopMode +}; // enum ProcessLoopMode + +/** Enumeration of regular expression intersection modes */ +enum RegExpInterMode +{ + /** Compute intersections for all regular expressions. */ + RE_INTER_ALL, + + /** + * Compute intersections only for regular expressions without re.allchar + * and re.range. + */ + RE_INTER_CONSTANT, + + /** + * Compute intersections only between regular expressions where one side does + * not contain re.allchar and re.range. + */ + RE_INTER_ONE_CONSTANT, + + /** Do not compute intersections of regular expressions. */ + RE_INTER_NONE, +}; // enum RegExpInterMode } // namespace strings } // namespace theory @@ -50,6 +70,9 @@ enum class ProcessLoopMode std::ostream& operator<<(std::ostream& out, theory::strings::ProcessLoopMode mode); +std::ostream& operator<<(std::ostream& out, + theory::strings::RegExpInterMode mode); + } // namespace CVC4 -#endif /* CVC4__THEORY__BV__BITBLAST_MODE_H */ +#endif /* CVC4__THEORY__STRINGS__STRINGS_MODES_H */ diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index fd00b8917..2d8411256 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -135,8 +135,8 @@ header = "options/strings_options.h" type = "CVC4::theory::strings::ProcessLoopMode" default = "CVC4::theory::strings::ProcessLoopMode::FULL" handler = "stringToStringsProcessLoopMode" - includes = ["options/strings_process_loop_mode.h"] - help = "choose how to process looping string equations, see --strings-process-loop-mode=help for details" + includes = ["options/strings_modes.h"] + help = "determines how to process looping string equations" [[option]] name = "stringInferAsLemmas" @@ -216,3 +216,13 @@ header = "options/strings_options.h" default = "true" read_only = true help = "do flat form inferences" + +[[option]] + name = "stringRegExpInterMode" + category = "expert" + long = "re-inter-mode=MODE" + type = "CVC4::theory::strings::RegExpInterMode" + default = "CVC4::theory::strings::RE_INTER_CONSTANT" + handler = "stringToRegExpInterMode" + includes = ["options/strings_modes.h"] + help = "determines which regular expressions intersections to compute" diff --git a/src/smt/smt_engine.cpp b/src/smt/smt_engine.cpp index f75726be2..709df6c9f 100644 --- a/src/smt/smt_engine.cpp +++ b/src/smt/smt_engine.cpp @@ -68,8 +68,8 @@ #include "options/sep_options.h" #include "options/set_language.h" #include "options/smt_options.h" +#include "options/strings_modes.h" #include "options/strings_options.h" -#include "options/strings_process_loop_mode.h" #include "options/theory_options.h" #include "options/uf_options.h" #include "preprocessing/preprocessing_pass.h" diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index f11254794..7286e2fb4 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -47,24 +47,61 @@ RegExpOpr::RegExpOpr() RegExpOpr::~RegExpOpr() {} bool RegExpOpr::checkConstRegExp( Node r ) { - Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with /" << mkString( r ) << "/" << std::endl; - bool ret = true; - if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) { - ret = d_cstre_cache[r]; - } else { - if(r.getKind() == kind::STRING_TO_REGEXP) { - Node tmp = Rewriter::rewrite( r[0] ); - ret = tmp.isConst(); - } else { - for(unsigned i=0; i::iterator it; + std::vector visit; + TNode cur; + visit.push_back(r); + do + { + cur = visit.back(); + visit.pop_back(); + it = d_constCache.find(cur); + + if (it == d_constCache.end()) + { + Kind ck = cur.getKind(); + if (ck == STRING_TO_REGEXP) + { + Node tmp = Rewriter::rewrite(cur[0]); + d_constCache[cur] = + tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE; + } + else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE) + { + d_constCache[cur] = RE_C_CONSTANT; + } + else + { + d_constCache[cur] = RE_C_UNKNOWN; + visit.push_back(cur); + visit.insert(visit.end(), cur.begin(), cur.end()); + } + } + else if (it->second == RE_C_UNKNOWN) + { + RegExpConstType ret = RE_C_CONRETE_CONSTANT; + for (const Node& cn : cur) + { + it = d_constCache.find(cn); + Assert(it != d_constCache.end()); + if (it->second > ret) + { + ret = it->second; } } + d_constCache[cur] = ret; } - d_cstre_cache[r] = ret; - } - return ret; + } while (!visit.empty()); + Assert(d_constCache.find(r) != d_constCache.end()); + return d_constCache[r]; } // 0-unknown, 1-yes, 2-no diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index 8f9541e91..cb35b37c6 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -33,6 +33,25 @@ namespace CVC4 { namespace theory { namespace strings { +/** + * Information on whether regular expressions contain constants or re.allchar. + * + * The order of this enumeration matters: the larger the value, the more + * possible regular expressions could fit the description. + */ +enum RegExpConstType +{ + // the regular expression doesn't contain variables or re.allchar or re.range + RE_C_CONRETE_CONSTANT, + // the regular expression doesn't contain variables, but may contain + // re.allchar or re.range + RE_C_CONSTANT, + // the regular expression may contain variables + RE_C_VARIABLE, + // the status of the regular expression is unknown (used internally) + RE_C_UNKNOWN, +}; + class RegExpOpr { typedef std::pair< Node, CVC4::String > PairNodeStr; typedef std::set< Node > SetNodes; @@ -58,7 +77,8 @@ class RegExpOpr { std::map d_dv_cache; std::map > d_deriv_cache; std::map > d_compl_cache; - std::map d_cstre_cache; + /** cache mapping regular expressions to whether they contain constants */ + std::unordered_map d_constCache; std::map, std::set > > d_cset_cache; std::map, std::set > > d_fset_cache; std::map d_inter_cache; @@ -96,6 +116,8 @@ class RegExpOpr { * are such that t is a constant (or rewrites to one). */ bool checkConstRegExp( Node r ); + /** get the constant type for regular expression r */ + RegExpConstType getRegExpConstType(Node r); void simplify(Node t, std::vector< Node > &new_nodes, bool polarity); int delta( Node r, Node &exp ); int derivativeS( Node r, CVC4::String c, Node &retNode ); diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp index c50889e78..463891839 100644 --- a/src/theory/strings/regexp_solver.cpp +++ b/src/theory/strings/regexp_solver.cpp @@ -248,13 +248,19 @@ void RegExpSolver::check(const std::map >& mems) bool RegExpSolver::checkEqcIntersect(const std::vector& mems) { + // do not compute intersections if the re intersection mode is none + if (options::stringRegExpInterMode() == RE_INTER_NONE) + { + return true; + } if (mems.empty()) { // nothing to do return true; } - // the initial regular expression membership + // the initial regular expression membership and its constant type Node mi; + RegExpConstType rcti = RE_C_UNKNOWN; NodeManager* nm = NodeManager::currentNM(); for (const Node& m : mems) { @@ -264,15 +270,29 @@ bool RegExpSolver::checkEqcIntersect(const std::vector& mems) Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP); continue; } - if (!d_regexp_opr.checkConstRegExp(m)) + RegExpConstType rct = d_regexp_opr.getRegExpConstType(m); + if (rct == RE_C_VARIABLE + || (options::stringRegExpInterMode() == RE_INTER_CONSTANT + && rct != RE_C_CONRETE_CONSTANT)) { - // cannot do intersection on RE with variables + // cannot do intersection on RE with variables, or with re.allchar based + // on option. continue; } + if (options::stringRegExpInterMode() == RE_INTER_ONE_CONSTANT) + { + if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT) + { + // if both have re.allchar, do not do intersection if the + // RE_INTER_ONE_CONSTANT option is set. + continue; + } + } if (mi.isNull()) { // first regular expression seen mi = m; + rcti = rct; continue; } bool spflag = false; diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index df278ba5a..1b1f87bfc 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1588,6 +1588,7 @@ set(regress_1_tests regress1/strings/nterm-re-inter-sigma.smt2 regress1/strings/pierre150331.smt2 regress1/strings/policy_variable.smt2 + regress1/strings/re-all-char-hard.smt2 regress1/strings/re-agg-total1.smt2 regress1/strings/re-agg-total2.smt2 regress1/strings/re-elim-exact.smt2 diff --git a/test/regress/regress1/strings/re-all-char-hard.smt2 b/test/regress/regress1/strings/re-all-char-hard.smt2 new file mode 100644 index 000000000..2a643ac88 --- /dev/null +++ b/test/regress/regress1/strings/re-all-char-hard.smt2 @@ -0,0 +1,11 @@ +; COMMAND-LINE: --no-re-elim +; EXPECT: sat +(set-info :smt-lib-version 2.6) +(set-logic ALL) +(set-info :status sat) +(declare-fun x () String) + +; we should not intersect these two regular expressions +(assert (str.in.re x (re.++ (str.to.re "abc:def:ghi:") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":cluster/sflower-bottle-guide-") (re.* re.allchar )))) +(assert (str.in.re x (re.++ (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar ) (str.to.re ":") (re.* re.allchar )))) +(check-sat) -- 2.30.2