From 97c2553d0b0535bd47517f755897c441e223568e Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Thu, 5 Dec 2019 06:39:32 -0800 Subject: [PATCH] Bi-directional unrolling of R* regular expressions (#3532) --- src/theory/strings/regexp_operation.cpp | 38 ++++++++++++++----- test/regress/CMakeLists.txt | 1 + test/regress/regress0/strings/bidir_star.smt2 | 8 ++++ 3 files changed, 37 insertions(+), 10 deletions(-) create mode 100644 test/regress/regress0/strings/bidir_star.smt2 diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index aaf1c864f..8707593c7 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -1168,18 +1168,36 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes } else if(r[0].getKind() == kind::REGEXP_SIGMA) { conc = d_true; } else { + NodeManager* nm = NodeManager::currentNM(); Node se = s.eqNode(d_emptyString); - Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]); - Node sk1 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" ); - Node sk2 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" ); - Node s1nz = sk1.eqNode(d_emptyString).negate(); - Node s2nz = sk2.eqNode(d_emptyString).negate(); - Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); - Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r); - Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2)); + Node sinr = nm->mkNode(kind::STRING_IN_REGEXP, s, r[0]); + Node sk1 = nm->mkSkolem( + "rs", s.getType(), "created for regular expression star"); + Node sk2 = nm->mkSkolem( + "rs", s.getType(), "created for regular expression star"); + Node sk3 = nm->mkSkolem( + "rs", s.getType(), "created for regular expression star"); - conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs); - conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc); + NodeBuilder<> nb(kind::AND); + nb << sk1.eqNode(d_emptyString).negate(); + nb << sk3.eqNode(d_emptyString).negate(); + nb << nm->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); + nb << nm->mkNode(kind::STRING_IN_REGEXP, sk2, r); + nb << nm->mkNode(kind::STRING_IN_REGEXP, sk3, r[0]); + nb << s.eqNode(nm->mkNode(kind::STRING_CONCAT, sk1, sk2, sk3)); + conc = nb; + + // We unfold `x in R*` by considering three cases: `x` is empty, `x` + // is matched by `R`, or `x` is matched by two or more `R`s. For the + // last case, we break `x` into three pieces, making the beginning + // and the end each match `R` and the middle match `R*`. Matching the + // beginning and the end with `R` allows us to reason about the + // beginning and the end of `x` simultaneously. + // + // x in R* ---> (x = "") v (x in R) v + // (x = x1 ++ x2 ++ x3 ^ x1 != "" ^ x3 != "" ^ + // x1 in R ^ x2 in R* ^ x3 in R) + conc = nm->mkNode(kind::OR, se, sinr, conc); } break; } diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 911943c64..90038872f 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -858,6 +858,7 @@ set(regress_0_tests regress0/smtlib/reset-force-logic.smt2 regress0/smtlib/reset-set-logic.smt2 regress0/smtlib/set-info-status.smt2 + regress0/strings/bidir_star.smt2 regress0/strings/bug001.smt2 regress0/strings/bug002.smt2 regress0/strings/bug612.smt2 diff --git a/test/regress/regress0/strings/bidir_star.smt2 b/test/regress/regress0/strings/bidir_star.smt2 new file mode 100644 index 000000000..7303d138f --- /dev/null +++ b/test/regress/regress0/strings/bidir_star.smt2 @@ -0,0 +1,8 @@ +; COMMAND-LINE: --strings-exp +(set-logic SLIA) +(declare-fun a () String) +(assert (>= (str.len a) 2)) +(assert (str.in.re a (re.+ (re.range "0" "1")))) +(assert (<= 3 (str.to.int (str.substr a (+ (- 2) (str.len a)) 1)))) +(set-info :status unsat) +(check-sat) -- 2.30.2