Optimize regular expression elimination (#2612)
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>
Wed, 10 Oct 2018 20:29:36 +0000 (15:29 -0500)
committerGitHub <noreply@github.com>
Wed, 10 Oct 2018 20:29:36 +0000 (15:29 -0500)
src/theory/strings/regexp_elim.cpp
test/regress/CMakeLists.txt
test/regress/Makefile.tests
test/regress/regress2/strings/non_termination_regular_expression6.smt2 [new file with mode: 0644]

index 8ea26fca955a573456fb1b2c6c99b62b367a37db..0310e46207afc7f14fc85b8b195f2f1fa30b01e9 100644 (file)
@@ -104,18 +104,21 @@ Node RegExpElimination::eliminateConcat(Node atom)
     // prev_end stores the current (symbolic) index in x that we are
     // searching.
     Node prev_end = d_zero;
+    // the symbolic index we start searching, for each child in sep_children.
+    std::vector<Node> prev_ends;
     unsigned gap_minsize_end = gap_minsize.back();
     bool gap_exact_end = gap_exact.back();
     std::vector<Node> non_greedy_find_vars;
     for (unsigned i = 0, size = sep_children.size(); i < size; i++)
     {
-      Node sc = sep_children[i];
       if (gap_minsize[i] > 0)
       {
         // the gap to this child is at least gap_minsize[i]
         prev_end =
             nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize[i])));
       }
+      prev_ends.push_back(prev_end);
+      Node sc = sep_children[i];
       Node lensc = nm->mkNode(STRING_LENGTH, sc);
       if (gap_exact[i])
       {
@@ -169,7 +172,6 @@ Node RegExpElimination::eliminateConcat(Node atom)
         Node lenSc = nm->mkNode(STRING_LENGTH, sc);
         Node loc = nm->mkNode(MINUS, lenx, nm->mkNode(PLUS, lenSc, cEnd));
         Node scc = sc.eqNode(nm->mkNode(STRING_SUBSTR, x, loc, lenSc));
-        conj.push_back(scc);
         // We also must ensure that we fit. This constraint is necessary in
         // addition to the constraint above. Take this example:
         //     x in (re.++ "A" _ (re.* _) "B" _) --->
@@ -182,9 +184,23 @@ Node RegExpElimination::eliminateConcat(Node atom)
         // would have been the case than "ABB" would be a model for x, where
         // the second constraint refers to the third position, and the third
         // constraint refers to the second position.
+        //
+        // With respect to the above example, the following is an optimization.
+        // For that example, we instead produce:
+        //     x in (re.++ "A" _ (re.* _) "B" _) --->
+        //       substr( x, 0, 1 ) = "A" ^          // find "A"
+        //       substr( x, len(x)-2, 1 ) = "B" ^   // "B" is at end - 2
+        //       2 <= len( x ) - 2
+        // The intuition is that above, there are two constraints that insist
+        // that "B" is found, whereas we only need one. The last constraint
+        // above says that the "B" we find at end-2 can be found >=1 after
+        // the "A".
+        conj.pop_back();
         Node fit = nm->mkNode(gap_exact[sep_children.size() - 1] ? EQUAL : LEQ,
-                              nm->mkNode(MINUS, prev_end, lenSc),
+                              prev_ends.back(),
                               loc);
+
+        conj.push_back(scc);
         conj.push_back(fit);
       }
       else if (gap_minsize_end > 0)
index a7b7532f10112e04264ead2488546b2ea8fabea9..bfcd640b9a4efc620da5f24f9e658ce49724a43d 100644 (file)
@@ -1710,6 +1710,7 @@ set(regress_2_tests
   regress2/strings/cmu-disagree-0707-dd.smt2
   regress2/strings/cmu-prereg-fmf.smt2
   regress2/strings/cmu-repl-len-nterm.smt2
+  regress2/strings/non_termination_regular_expression6.smt2
   regress2/strings/norn-dis-0707-3.smt2
   regress2/strings/repl-repl.smt2
   regress2/sygus/MPwL_d1s3.sy
index 5236509262270d7de5dd016f33d906ba334b9c8d..fc080f0e0859b94607f149bd171c016c707dd865 100644 (file)
@@ -1703,6 +1703,7 @@ REG2_TESTS = \
        regress2/strings/cmu-disagree-0707-dd.smt2 \
        regress2/strings/cmu-prereg-fmf.smt2 \
        regress2/strings/cmu-repl-len-nterm.smt2 \
+       regress2/strings/non_termination_regular_expression6.smt2 \
        regress2/strings/norn-dis-0707-3.smt2 \
        regress2/strings/repl-repl.smt2 \
        regress2/sygus/MPwL_d1s3.sy \
diff --git a/test/regress/regress2/strings/non_termination_regular_expression6.smt2 b/test/regress/regress2/strings/non_termination_regular_expression6.smt2
new file mode 100644 (file)
index 0000000..340cade
--- /dev/null
@@ -0,0 +1,61 @@
+; COMMAND-LINE: --strings-exp --re-elim
+; EXPECT: unsat
+(set-logic ALL)
+(set-info :status unsat)
+(declare-const actionName String)
+(declare-const actionNamespace String)
+(declare-const resource_account String)
+(declare-const resource_partition String)
+(declare-const resource_prefix String)
+(declare-const resource_region String)
+(declare-const resource_resource String)
+(declare-const resource_service String)
+
+; Action: p0.0
+(declare-const p0.0.action Bool)
+(assert (= p0.0.action (and (= "sqs" actionNamespace) (= "sendmessage" actionName))))
+
+; Resource: p0.0
+(declare-const p0.0.resource Bool)
+(assert (= p0.0.resource (and (= resource_prefix "arn") (= resource_partition "aws") (= resource_service "sqs") (= resource_region "us-east-1") (= resource_account "111144448888") (str.in.re resource_resource (re.++ (str.to.re "ab") (re.* re.allchar) (str.to.re "b") (re.* re.allchar) (str.to.re "b") (re.* re.allchar) (str.to.re "b"))))))
+
+; Statement: p0.0
+(declare-const p0.0.statement.allows Bool)
+(assert (= p0.0.statement.allows (and p0.0.action p0.0.resource)))
+
+; Policy: 0
+(declare-const p0.denies Bool)
+(assert (not p0.denies))
+(declare-const p0.allows Bool)
+(assert (= p0.allows (and (not p0.denies) p0.0.statement.allows)))
+(declare-const p0.neutral Bool)
+(assert (= p0.neutral (and (not p0.allows) (not p0.denies))))
+
+; Action: p1.0
+(declare-const p1.0.action Bool)
+(assert (= p1.0.action (and (= "sqs" actionNamespace) (= "sendmessage" actionName))))
+
+; Resource: p1.0
+(declare-const p1.0.resource Bool)
+(assert (= p1.0.resource (and (= resource_prefix "arn") (= resource_partition "aws") (= resource_service "sqs") (= resource_region "us-east-1") (= resource_account "111144448888") (str.in.re resource_resource (re.++ (str.to.re "a") (re.* re.allchar) (str.to.re "b") (re.* re.allchar) (str.to.re "b") (re.* re.allchar) (str.to.re "b"))))))
+
+; Statement: p1.0
+(declare-const p1.0.statement.allows Bool)
+(assert (= p1.0.statement.allows (and p1.0.action p1.0.resource)))
+
+; Policy: 1
+(declare-const p1.denies Bool)
+(assert (not p1.denies))
+(declare-const p1.allows Bool)
+(assert (= p1.allows (and (not p1.denies) p1.0.statement.allows)))
+(declare-const p1.neutral Bool)
+(assert (= p1.neutral (and (not p1.allows) (not p1.denies))))
+
+; Resource service invariant
+(assert (not (str.contains resource_service ":")))
+(assert (= resource_prefix "arn"))
+
+; Goals
+(assert p0.allows)
+(assert (or p1.denies p1.neutral))
+(check-sat)