From: Andrew Reynolds Date: Thu, 30 Aug 2018 16:57:58 +0000 (-0500) Subject: Add regular expression elimination module (#2400) X-Git-Tag: cvc5-1.0.0~4697 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bc0c0b8b9ea77e8e4e328dbe66a4582fa7883eda;p=cvc5.git Add regular expression elimination module (#2400) --- diff --git a/src/Makefile.am b/src/Makefile.am index 7231820f8..1a55d3810 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -582,6 +582,8 @@ libcvc4_la_SOURCES = \ theory/sets/theory_sets_rewriter.h \ theory/sets/theory_sets_type_enumerator.h \ theory/sets/theory_sets_type_rules.h \ + theory/strings/regexp_elim.cpp \ + theory/strings/regexp_elim.h \ theory/strings/regexp_operation.cpp \ theory/strings/regexp_operation.h \ theory/strings/theory_strings.cpp \ diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index 3498b6183..c48583bb2 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -209,6 +209,22 @@ header = "options/strings_options.h" read_only = true help = "do length propagation based on constant splits" +[[option]] + name = "regExpElim" + category = "regular" + long = "re-elim" + type = "bool" + default = "false" + help = "elimination techniques for regular expressions" + +[[option]] + name = "regExpElimAgg" + category = "regular" + long = "re-elim-agg" + type = "bool" + default = "false" + help = "aggressive elimination techniques for regular expressions" + [[option]] name = "stringFlatForms" category = "regular" diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp new file mode 100644 index 000000000..7d65a872c --- /dev/null +++ b/src/theory/strings/regexp_elim.cpp @@ -0,0 +1,446 @@ +/********************* */ +/*! \file regexp_elim.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of techniques for eliminating regular expressions + ** + **/ + +#include "theory/strings/regexp_elim.h" + +#include "options/strings_options.h" +#include "theory/strings/theory_strings_rewriter.h" + +using namespace CVC4; +using namespace CVC4::kind; +using namespace CVC4::theory; +using namespace CVC4::theory::strings; + +RegExpElimination::RegExpElimination() +{ + d_zero = NodeManager::currentNM()->mkConst(Rational(0)); + d_one = NodeManager::currentNM()->mkConst(Rational(1)); + d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1)); +} + +Node RegExpElimination::eliminate(Node atom) +{ + Assert(atom.getKind() == STRING_IN_REGEXP); + if (atom[1].getKind() == REGEXP_CONCAT) + { + return eliminateConcat(atom); + } + else if (atom[1].getKind() == REGEXP_STAR) + { + return eliminateStar(atom); + } + return Node::null(); +} + +Node RegExpElimination::eliminateConcat(Node atom) +{ + NodeManager* nm = NodeManager::currentNM(); + Node x = atom[0]; + Node lenx = nm->mkNode(STRING_LENGTH, x); + Node re = atom[1]; + // memberships of the form x in re.++ * s1 * ... * sn *, where * are + // any number of repetitions (exact or indefinite) of re.allchar. + Trace("re-elim-debug") << "Try re concat with gaps " << atom << std::endl; + std::vector children; + TheoryStringsRewriter::getConcat(re, children); + bool onlySigmasAndConsts = true; + std::vector sep_children; + std::vector gap_minsize; + std::vector gap_exact; + // the first gap is initially strict zero + gap_minsize.push_back(0); + gap_exact.push_back(true); + for (const Node& c : children) + { + Trace("re-elim-debug") << " " << c << std::endl; + onlySigmasAndConsts = false; + if (c.getKind() == STRING_TO_REGEXP) + { + onlySigmasAndConsts = true; + sep_children.push_back(c[0]); + // the next gap is initially strict zero + gap_minsize.push_back(0); + gap_exact.push_back(true); + } + else if (c.getKind() == REGEXP_STAR && c[0].getKind() == REGEXP_SIGMA) + { + // found a gap of any size + onlySigmasAndConsts = true; + gap_exact[gap_exact.size() - 1] = false; + } + else if (c.getKind() == REGEXP_SIGMA) + { + // add one to the minimum size of the gap + onlySigmasAndConsts = true; + gap_minsize[gap_minsize.size() - 1]++; + } + if (!onlySigmasAndConsts) + { + Trace("re-elim-debug") << "...cannot handle " << c << std::endl; + break; + } + } + // we should always rewrite concatenations that are purely re.allchar + // and re.*( re.allchar ). + Assert(!onlySigmasAndConsts || !sep_children.empty()); + if (onlySigmasAndConsts && !sep_children.empty()) + { + bool canProcess = true; + std::vector conj; + // The following constructs a set of constraints that encodes that a + // set of string terms are found, in order, in string x. + // prev_end stores the current (symbolic) index in x that we are + // searching. + Node prev_end = d_zero; + unsigned gap_minsize_end = gap_minsize.back(); + bool gap_exact_end = gap_exact.back(); + std::vector non_greedy_find_vars; + for (unsigned i = 0, size = sep_children.size(); i < size; i++) + { + Node sc = sep_children[i]; + if (gap_minsize[i] > 0) + { + // the gap to this child is at least gap_minsize[i] + prev_end = + nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize[i]))); + } + Node lensc = nm->mkNode(STRING_LENGTH, sc); + if (gap_exact[i]) + { + // if the gap is exact, it is a substring constraint + Node curr = prev_end; + Node ss = nm->mkNode(STRING_SUBSTR, x, curr, lensc); + conj.push_back(ss.eqNode(sc)); + prev_end = nm->mkNode(PLUS, curr, lensc); + } + else + { + // otherwise, we can use indexof to represent some next occurrence + if (gap_exact[i + 1] && i + 1 != size) + { + if (!options::regExpElimAgg()) + { + canProcess = false; + break; + } + // if the gap after this one is strict, we need a non-greedy find + // thus, we add a symbolic constant + Node k = nm->mkBoundVar(nm->integerType()); + non_greedy_find_vars.push_back(k); + prev_end = nm->mkNode(PLUS, prev_end, k); + } + Node curr = nm->mkNode(STRING_STRIDOF, x, sc, prev_end); + Node idofFind = curr.eqNode(d_neg_one).negate(); + conj.push_back(idofFind); + prev_end = nm->mkNode(PLUS, curr, lensc); + } + } + + if (canProcess) + { + // since sep_children is non-empty, conj is non-empty + Assert(!conj.empty()); + // Process the last gap, if necessary. + // Notice that if the last gap is not exact and its minsize is zero, + // then the last indexof/substr constraint entails the following + // constraint, so it is not necessary to add. + if (gap_minsize_end > 0 || gap_exact_end) + { + Node fit = nm->mkNode( + gap_exact_end ? EQUAL : LEQ, + nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize_end))), + lenx); + conj.push_back(fit); + } + Node res = conj.size() == 1 ? conj[0] : nm->mkNode(AND, conj); + // process the non-greedy find variables + if (!non_greedy_find_vars.empty()) + { + std::vector children; + for (const Node& v : non_greedy_find_vars) + { + Node bound = nm->mkNode( + AND, nm->mkNode(LEQ, d_zero, v), nm->mkNode(LT, v, lenx)); + children.push_back(bound); + } + children.push_back(res); + Node body = nm->mkNode(AND, children); + Node bvl = nm->mkNode(BOUND_VAR_LIST, non_greedy_find_vars); + res = nm->mkNode(EXISTS, bvl, body); + } + // e.g., writing "A" for (str.to.re "A") and _ for re.allchar: + // x in (re.++ "A" (re.* _) "B" (re.* _)) ---> + // substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1 + // x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) ---> + // indexof(x,"A",0)!=-1 ^ + // indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^ + // indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x) + + // An example of a non-greedy find: + // x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) ---> + // exists k. 0 <= k < len( x ) ^ + // indexof( x, "A", k ) != -1 ^ + // substr( x, indexof( x, "A", k )+2, 1 ) = "B" + return returnElim(atom, res, "concat-with-gaps"); + } + } + + if (!options::regExpElimAgg()) + { + return Node::null(); + } + // only aggressive rewrites below here + + // if the first or last child is constant string, we can split the membership + // into a conjunction of two memberships. + Node sStartIndex = d_zero; + Node sLength = lenx; + std::vector sConstraints; + std::vector rexpElimChildren; + unsigned nchildren = children.size(); + Assert(nchildren > 1); + for (unsigned r = 0; r < 2; r++) + { + unsigned index = r == 0 ? 0 : nchildren - 1; + Assert(children[index + (r == 0 ? 1 : -1)].getKind() != STRING_TO_REGEXP); + Node c = children[index]; + if (c.getKind() == STRING_TO_REGEXP) + { + Node s = c[0]; + Node lens = nm->mkNode(STRING_LENGTH, s); + Node sss = r == 0 ? d_zero : nm->mkNode(MINUS, lenx, lens); + Node ss = nm->mkNode(STRING_SUBSTR, x, sss, lens); + sConstraints.push_back(ss.eqNode(s)); + if (r == 0) + { + sStartIndex = lens; + } + sLength = nm->mkNode(MINUS, sLength, lens); + } + if (r == 1 && !sConstraints.empty()) + { + // add the middle children + for (unsigned i = 1; i < (nchildren - 1); i++) + { + rexpElimChildren.push_back(children[i]); + } + } + if (c.getKind() != STRING_TO_REGEXP) + { + rexpElimChildren.push_back(c); + } + } + Assert(rexpElimChildren.size() + sConstraints.size() == nchildren); + if (!sConstraints.empty()) + { + Node ss = nm->mkNode(STRING_SUBSTR, x, sStartIndex, sLength); + Assert(!rexpElimChildren.empty()); + Node regElim = + TheoryStringsRewriter::mkConcat(REGEXP_CONCAT, rexpElimChildren); + sConstraints.push_back(nm->mkNode(STRING_IN_REGEXP, ss, regElim)); + Node ret = nm->mkNode(AND, sConstraints); + // e.g. + // x in re.++( "A", R ) ---> substr(x,0,1)="A" ^ substr(x,1,len(x)-1) in R + return returnElim(atom, ret, "concat-splice"); + } + Assert(nchildren > 1); + for (unsigned i = 0; i < nchildren; i++) + { + if (children[i].getKind() == STRING_TO_REGEXP) + { + Node s = children[i][0]; + Node lens = nm->mkNode(STRING_LENGTH, s); + // there exists an index in this string such that the substring is this + Node k; + std::vector echildren; + if (i == 0) + { + k = d_zero; + } + else if (i + 1 == nchildren) + { + k = nm->mkNode(MINUS, lenx, lens); + } + else + { + k = nm->mkBoundVar(nm->integerType()); + Node bound = + nm->mkNode(AND, + nm->mkNode(LEQ, d_zero, k), + nm->mkNode(LT, k, nm->mkNode(MINUS, lenx, lens))); + echildren.push_back(bound); + } + Node substrEq = nm->mkNode(STRING_SUBSTR, x, k, lens).eqNode(s); + echildren.push_back(substrEq); + if (i > 0) + { + std::vector rprefix; + rprefix.insert(rprefix.end(), children.begin(), children.begin() + i); + Node rpn = TheoryStringsRewriter::mkConcat(REGEXP_CONCAT, rprefix); + Node substrPrefix = nm->mkNode( + STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, d_zero, k), rpn); + echildren.push_back(substrPrefix); + } + if (i + 1 < nchildren) + { + std::vector rsuffix; + rsuffix.insert(rsuffix.end(), children.begin() + i + 1, children.end()); + Node rps = TheoryStringsRewriter::mkConcat(REGEXP_CONCAT, rsuffix); + Node ks = nm->mkNode(PLUS, k, lens); + Node substrSuffix = nm->mkNode( + STRING_IN_REGEXP, + nm->mkNode(STRING_SUBSTR, x, ks, nm->mkNode(MINUS, lenx, ks)), + rps); + echildren.push_back(substrSuffix); + } + Node body = nm->mkNode(AND, echildren); + if (k.getKind() == BOUND_VARIABLE) + { + Node bvl = nm->mkNode(BOUND_VAR_LIST, k); + body = nm->mkNode(EXISTS, bvl, body); + } + // e.g. x in re.++( R1, "AB", R2 ) ---> + // exists k. + // 0 <= k <= (len(x)-2) ^ + // substr( x, k, 2 ) = "AB" ^ + // substr( x, 0, k ) in R1 ^ + // substr( x, k+2, len(x)-(k+2) ) in R2 + return returnElim(atom, body, "concat-find"); + } + } + return Node::null(); +} + +Node RegExpElimination::eliminateStar(Node atom) +{ + if (!options::regExpElimAgg()) + { + return Node::null(); + } + // only aggressive rewrites below here + + NodeManager* nm = NodeManager::currentNM(); + Node x = atom[0]; + Node lenx = nm->mkNode(STRING_LENGTH, x); + Node re = atom[1]; + // for regular expression star, + // if the period is a fixed constant, we can turn it into a bounded + // quantifier + std::vector disj; + if (re[0].getKind() == REGEXP_UNION) + { + for (const Node& r : re[0]) + { + disj.push_back(r); + } + } + else + { + disj.push_back(re[0]); + } + bool lenOnePeriod = true; + std::vector char_constraints; + Node index = nm->mkBoundVar(nm->integerType()); + Node substr_ch = nm->mkNode(STRING_SUBSTR, x, index, d_one); + substr_ch = Rewriter::rewrite(substr_ch); + // handle the case where it is purely characters + for (const Node& r : disj) + { + Assert(r.getKind() != REGEXP_UNION); + Assert(r.getKind() != REGEXP_SIGMA); + lenOnePeriod = false; + // lenOnePeriod is true if this regular expression is a single character + // regular expression + if (r.getKind() == STRING_TO_REGEXP) + { + Node s = r[0]; + if (s.isConst() && s.getConst().size() == 1) + { + lenOnePeriod = true; + } + } + else if (r.getKind() == REGEXP_RANGE) + { + lenOnePeriod = true; + } + if (!lenOnePeriod) + { + break; + } + else + { + Node regexp_ch = nm->mkNode(STRING_IN_REGEXP, substr_ch, r); + regexp_ch = Rewriter::rewrite(regexp_ch); + Assert(regexp_ch.getKind() != STRING_IN_REGEXP); + char_constraints.push_back(regexp_ch); + } + } + if (lenOnePeriod) + { + Assert(!char_constraints.empty()); + Node bound = nm->mkNode( + AND, nm->mkNode(LEQ, d_zero, index), nm->mkNode(LT, index, lenx)); + Node conc = char_constraints.size() == 1 ? char_constraints[0] + : nm->mkNode(OR, char_constraints); + Node body = nm->mkNode(OR, bound.negate(), conc); + Node bvl = nm->mkNode(BOUND_VAR_LIST, index); + Node res = nm->mkNode(FORALL, bvl, body); + // e.g. + // x in (re.* (re.union "A" "B" )) ---> + // forall k. 0<=k (substr(x,k,1) in "A" OR substr(x,k,1) in "B") + return returnElim(atom, res, "star-char"); + } + // otherwise, for stars of constant length these are periodic + if (disj.size() == 1) + { + Node r = disj[0]; + if (r.getKind() == STRING_TO_REGEXP) + { + Node s = r[0]; + if (s.isConst()) + { + Node lens = nm->mkNode(STRING_LENGTH, s); + lens = Rewriter::rewrite(lens); + Assert(lens.isConst()); + std::vector conj; + Node bound = nm->mkNode( + AND, + nm->mkNode(LEQ, d_zero, index), + nm->mkNode(LT, index, nm->mkNode(INTS_DIVISION, lenx, lens))); + Node conc = + nm->mkNode(STRING_SUBSTR, x, nm->mkNode(MULT, index, lens), lens) + .eqNode(s); + Node body = nm->mkNode(OR, bound.negate(), conc); + Node bvl = nm->mkNode(BOUND_VAR_LIST, index); + Node res = nm->mkNode(FORALL, bvl, body); + res = nm->mkNode( + AND, nm->mkNode(INTS_MODULUS, lenx, lens).eqNode(d_zero), res); + // e.g. + // x in ("abc")* ---> + // forall k. 0 <= k < (len( x ) div 3) => substr(x,3*k,3) = "abc" ^ + // len(x) mod 3 = 0 + return returnElim(atom, res, "star-constant"); + } + } + } + return Node::null(); +} + +Node RegExpElimination::returnElim(Node atom, Node atomElim, const char* id) +{ + Trace("re-elim") << "re-elim: " << atom << " to " << atomElim << " by " << id + << "." << std::endl; + return atomElim; +} diff --git a/src/theory/strings/regexp_elim.h b/src/theory/strings/regexp_elim.h new file mode 100644 index 000000000..eddf33e71 --- /dev/null +++ b/src/theory/strings/regexp_elim.h @@ -0,0 +1,66 @@ +/********************* */ +/*! \file regexp_elim.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Techniques for eliminating regular expressions + ** + **/ + +#include "cvc4_private.h" +#ifndef __CVC4__THEORY__STRINGS__REGEXP_ELIM_H +#define __CVC4__THEORY__STRINGS__REGEXP_ELIM_H + +#include "expr/node.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** Regular expression membership elimination + * + * This class implements techniques for reducing regular expression memberships + * to bounded integer quantifiers + extended function constraints. + * + * It is used by TheoryStrings during ppRewrite. + */ +class RegExpElimination +{ + public: + RegExpElimination(); + /** eliminate membership + * + * This method takes as input a regular expression membership atom of the + * form (str.in.re x R). If this method returns a non-null node ret, then ret + * is equivalent to atom. + */ + Node eliminate(Node atom); + + private: + /** common terms */ + Node d_zero; + Node d_one; + Node d_neg_one; + /** return elimination + * + * This method is called when atom is rewritten to atomElim, and returns + * atomElim. id is an identifier indicating the reason for the elimination. + */ + Node returnElim(Node atom, Node atomElim, const char* id); + /** elimination for regular expression concatenation */ + Node eliminateConcat(Node atom); + /** elimination for regular expression star */ + Node eliminateStar(Node atom); +}; /* class RegExpElimination */ + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* __CVC4__THEORY__STRINGS__REGEXP_ELIM_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 72e82edca..35835398c 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -793,7 +793,6 @@ Node TheoryStrings::expandDefinition(LogicRequest &logicRequest, Node node) { return node; } - void TheoryStrings::check(Effort e) { if (done() && e " << atomElim + << " via regular expression elimination." + << std::endl; + atom = atomElim; + } + } if( !options::stringLazyPreproc() ){ //eager preprocess here std::vector< Node > new_nodes; diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 2c0cb42aa..c1bb1e0a0 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -22,6 +22,7 @@ #include "context/cdhashset.h" #include "context/cdlist.h" #include "expr/attribute.h" +#include "theory/strings/regexp_elim.h" #include "theory/strings/regexp_operation.h" #include "theory/strings/theory_strings_preprocess.h" #include "theory/theory.h" @@ -618,10 +619,10 @@ private: NodeSet d_processed_memberships; // antecedant for why regexp membership must be true NodeNodeMap d_regexp_ant; - // membership length - //std::map< Node, bool > d_membership_length; - // regular expression operations + /** regular expression operation module */ RegExpOpr d_regexp_opr; + /** regular expression elimination module */ + RegExpElimination d_regexp_elim; CVC4::String getHeadConst( Node x ); bool deriveRegExp( Node x, Node r, Node ant ); diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 1c68097ae..354115850 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -917,24 +917,45 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { } }else if( r.getKind() == kind::REGEXP_CONCAT ){ bool allSigma = true; + bool allSigmaStrict = true; + unsigned allSigmaMinSize = 0; bool allString = true; std::vector< Node > cc; - for(unsigned i=0; imkConst( ::CVC4::Rational( r.getNumChildren() ) ); - retNode = num.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x)); - }else if( allString ){ - retNode = x.eqNode( mkConcat( kind::STRING_CONCAT, cc ) ); + if (allSigma) + { + Node num = nm->mkConst(Rational(allSigmaMinSize)); + Node lenx = nm->mkNode(STRING_LENGTH, x); + retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num); + return returnRewrite(node, retNode, "re-concat-pure-allchar"); + } + else if (allString) + { + retNode = x.eqNode(mkConcat(STRING_CONCAT, cc)); + return returnRewrite(node, retNode, "re-concat-pure-str"); } }else if( r.getKind()==kind::REGEXP_INTER || r.getKind()==kind::REGEXP_UNION ){ std::vector< Node > mvec; diff --git a/test/regress/Makefile.tests b/test/regress/Makefile.tests index 6187cb2fa..63593c662 100644 --- a/test/regress/Makefile.tests +++ b/test/regress/Makefile.tests @@ -814,6 +814,7 @@ REG0_TESTS = \ regress0/strings/norn-31.smt2 \ regress0/strings/norn-simp-rew.smt2 \ regress0/strings/repl-rewrites2.smt2 \ + regress0/strings/rewrites-re-concat.smt2 \ regress0/strings/rewrites-v2.smt2 \ regress0/strings/std2.6.1.smt2 \ regress0/strings/str003.smt2 \ @@ -1485,6 +1486,7 @@ REG1_TESTS = \ regress1/strings/idof-triv.smt2 \ regress1/strings/ilc-l-nt.smt2 \ regress1/strings/issue1105.smt2 \ + regress1/strings/issue1684-regex.smt2 \ regress1/strings/issue2060.smt2 \ regress1/strings/kaluza-fl.smt2 \ regress1/strings/loop002.smt2 \ @@ -1496,12 +1498,16 @@ REG1_TESTS = \ regress1/strings/loop008.smt2 \ regress1/strings/loop009.smt2 \ regress1/strings/nf-ff-contains-abs.smt2 \ + regress1/strings/non_termination_regular_expression4.smt2 \ + regress1/strings/norn-13.smt2 \ regress1/strings/norn-360.smt2 \ regress1/strings/norn-ab.smt2 \ regress1/strings/norn-nel-bug-052116.smt2 \ regress1/strings/norn-simp-rew-sat.smt2 \ regress1/strings/nterm-re-inter-sigma.smt2 \ regress1/strings/pierre150331.smt2 \ + regress1/strings/policy_variable.smt2 \ + regress1/strings/re-elim-exact.smt2 \ regress1/strings/re-unsound-080718.smt2 \ regress1/strings/regexp001.smt2 \ regress1/strings/regexp002.smt2 \ diff --git a/test/regress/regress0/strings/rewrites-re-concat.smt2 b/test/regress/regress0/strings/rewrites-re-concat.smt2 new file mode 100644 index 000000000..fe6691e73 --- /dev/null +++ b/test/regress/regress0/strings/rewrites-re-concat.smt2 @@ -0,0 +1,21 @@ +(set-info :smt-lib-version 2.5) +(set-logic SLIA) +(set-info :status unsat) + +(declare-fun x () String) +(assert (str.in.re x (re.++ (str.to.re "baa") (re.* (str.to.re "a")) (re.* (str.to.re "c"))))) + +(declare-fun y () String) +(assert (< (str.len y) 2)) + +(assert (or +(not (str.in.re x (re.++ (str.to.re "baa") (re.* (str.to.re "a")) (re.* (str.to.re "a")) (re.* (str.to.re "c"))))) +(not (str.in.re x (re.++ (str.to.re "ba") (str.to.re "") (re.* (str.to.re "a")) (str.to.re "a") (re.* (str.to.re "c"))))) +(not (str.in.re x (re.++ (str.to.re "b") (re.* (str.to.re "a")) (str.to.re "a") (re.* (str.to.re "a")) (str.to.re "a") (re.* (str.to.re "c"))))) + +(str.in.re y (re.++ re.allchar re.allchar (re.* re.allchar) re.allchar)) +(str.in.re y (re.++ re.allchar re.allchar re.allchar)) +) +) + +(check-sat) diff --git a/test/regress/regress1/strings/issue1684-regex.smt2 b/test/regress/regress1/strings/issue1684-regex.smt2 new file mode 100644 index 000000000..35eb8456d --- /dev/null +++ b/test/regress/regress1/strings/issue1684-regex.smt2 @@ -0,0 +1,8 @@ +(set-info :smt-lib-version 2.5) +(set-logic QF_S) +(set-info :status sat) +(set-option :strings-exp true) +(declare-const s String) +(assert (str.in.re s (re.* (re.range "\x00" "\xFF")))) +(assert (str.in.re s (re.range "\x00" "\xFF"))) +(check-sat) \ No newline at end of file diff --git a/test/regress/regress1/strings/non_termination_regular_expression4.smt2 b/test/regress/regress1/strings/non_termination_regular_expression4.smt2 new file mode 100644 index 000000000..0b527345d --- /dev/null +++ b/test/regress/regress1/strings/non_termination_regular_expression4.smt2 @@ -0,0 +1,36 @@ +(set-info :smt-lib-version 2.5) +(set-logic ALL) +(set-info :status unsat) +(set-option :strings-exp true) +(set-option :re-elim true) +(declare-const actionName String) +(declare-const actionNamespace String) + +; Action: p0.0 +(declare-const p0.0.action Bool) +(assert (= p0.0.action (and (= actionNamespace "foobar") (str.in.re actionName (re.++ (str.to.re "foo") (re.* re.allchar) (re.++ (str.to.re "") re.allchar (str.to.re "bar"))))))) + +; Policy: 0 +(declare-const p0.denies Bool) +(assert (not p0.denies)) +(declare-const p0.allows Bool) +(assert (= p0.allows (and (not p0.denies) p0.0.action))) +(declare-const p0.neutral Bool) +(assert (= p0.neutral (and (not p0.allows) (not p0.denies)))) + +; Action: p1.0 +(declare-const p1.0.action Bool) +(assert (= p1.0.action (and (= actionNamespace "foobar") (str.in.re actionName (re.++ (re.++ (str.to.re "foo") re.allchar (str.to.re "")) (re.* re.allchar) (str.to.re "bar")))))) + +; Policy: 1 +(declare-const p1.denies Bool) +(assert (not p1.denies)) +(declare-const p1.allows Bool) +(assert (= p1.allows (and (not p1.denies) p1.0.action))) +(declare-const p1.neutral Bool) +(assert (= p1.neutral (and (not p1.allows) (not p1.denies)))) + +; Goals +(assert p0.allows) +(assert (or p1.denies p1.neutral)) +(check-sat) diff --git a/test/regress/regress1/strings/norn-13.smt2 b/test/regress/regress1/strings/norn-13.smt2 new file mode 100644 index 000000000..912f4477a --- /dev/null +++ b/test/regress/regress1/strings/norn-13.smt2 @@ -0,0 +1,23 @@ +(set-logic QF_S) +(set-option :strings-exp true) +(set-info :status sat) + +(declare-fun var_0 () String) +(declare-fun var_1 () String) +(declare-fun var_2 () String) +(declare-fun var_3 () String) +(declare-fun var_4 () String) +(declare-fun var_5 () String) +(declare-fun var_6 () String) +(declare-fun var_7 () String) +(declare-fun var_8 () String) +(declare-fun var_9 () String) +(declare-fun var_10 () String) +(declare-fun var_11 () String) +(declare-fun var_12 () String) + +(assert (str.in.re "" (re.* (re.range "a" "u")))) +(assert (str.in.re var_4 (re.* (re.range "a" "u")))) +(assert (str.in.re var_4 (re.* (re.union (str.to.re "a") (str.to.re "b"))))) +(assert (not (str.in.re (str.++ var_4 "z" ) (re.* (str.to.re "z"))))) +(check-sat) diff --git a/test/regress/regress1/strings/policy_variable.smt2 b/test/regress/regress1/strings/policy_variable.smt2 new file mode 100644 index 000000000..4d14e95d5 --- /dev/null +++ b/test/regress/regress1/strings/policy_variable.smt2 @@ -0,0 +1,40 @@ +(set-info :smt-lib-version 2.5) +(set-logic ALL) +(set-info :status unsat) +(set-option :strings-exp true) +(set-option :re-elim true) +(declare-const actionName String) +(declare-const actionNamespace String) +(declare-const example_key String) + +; Action: p0.0 +(declare-const p0.0.action Bool) +(assert (= p0.0.action (and (= "foobar" actionNamespace) (and (str.prefixof "ab" actionName) (str.contains (str.substr actionName 2 (- (str.len actionName) 3)) "b") (str.suffixof "b" actionName) (not (= actionName "abb")) (not (= actionName "ab")))))) + +; Policy: 0 +(declare-const p0.denies Bool) +(assert (not p0.denies)) +(declare-const p0.allows Bool) +(assert (= p0.allows (and (not p0.denies) p0.0.action))) +(declare-const p0.neutral Bool) +(assert (= p0.neutral (and (not p0.allows) (not p0.denies)))) + +; Action: p1.0 +(declare-const p1.0.action Bool) +(assert (= p1.0.action (and (= "foobar" actionNamespace) (and (str.prefixof "a" actionName) (str.prefixof example_key (str.substr actionName 1 (- (str.len actionName) 1))) (str.contains (str.substr actionName (+ 1 (str.len example_key)) (- (str.len actionName) 1 (str.len example_key))) "b") (str.suffixof "b" actionName))))) + +(declare-const p1.0.condition Bool) +(assert (= p1.0.condition (str.prefixof "bb" example_key))) + +; Policy: 1 +(declare-const p1.denies Bool) +(assert (not p1.denies)) +(declare-const p1.allows Bool) +(assert (= p1.allows (and (not p1.denies) p1.0.action p1.0.condition))) +(declare-const p1.neutral Bool) +(assert (= p1.neutral (and (not p1.allows) (not p1.denies)))) + +; Goals +(assert (or p0.neutral p0.denies)) +(assert p1.allows) +(check-sat) diff --git a/test/regress/regress1/strings/re-elim-exact.smt2 b/test/regress/regress1/strings/re-elim-exact.smt2 new file mode 100644 index 000000000..62b934610 --- /dev/null +++ b/test/regress/regress1/strings/re-elim-exact.smt2 @@ -0,0 +1,10 @@ +(set-info :smt-lib-version 2.5) +(set-logic ALL) +(set-info :status sat) +(set-option :strings-exp true) +(set-option :re-elim true) +(declare-fun x () String) + +(assert (str.in.re x (re.++ re.allchar (str.to.re "A") re.allchar (str.to.re "B") re.allchar))) + +(check-sat)