From 7aebe3a327f1075f1489384b7e4e2808250ae344 Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Tue, 8 Oct 2019 10:00:48 -0700 Subject: [PATCH] [CVC Parser] Add support for regular expressions (#3346) --- src/parser/cvc/Cvc.g | 75 ++++++++++++++----- test/regress/CMakeLists.txt | 1 + .../regress0/strings/regexp-native-simple.cvc | 13 ++++ 3 files changed, 69 insertions(+), 20 deletions(-) create mode 100644 test/regress/regress0/strings/regexp-native-simple.cvc diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g index 038c1f1d0..94bb87fdb 100644 --- a/src/parser/cvc/Cvc.g +++ b/src/parser/cvc/Cvc.g @@ -213,7 +213,6 @@ tokens { JOIN_IMAGE_TOK = 'JOIN_IMAGE'; // Strings - STRING_TOK = 'STRING'; STRING_CONCAT_TOK = 'CONCAT'; STRING_LENGTH_TOK = 'LENGTH'; @@ -227,19 +226,17 @@ tokens { STRING_SUFFIXOF_TOK = 'SUFFIXOF'; STRING_STOI_TOK = 'STRING_TO_INTEGER'; STRING_ITOS_TOK = 'INTEGER_TO_STRING'; - //Regular expressions (TODO) - //STRING_IN_REGEXP_TOK - //STRING_TO_REGEXP_TOK - //REGEXP_CONCAT_TOK - //REGEXP_UNION_TOK - //REGEXP_INTER_TOK - //REGEXP_STAR_TOK - //REGEXP_PLUS_TOK - //REGEXP_OPT_TOK - //REGEXP_RANGE_TOK - //REGEXP_LOOP_TOK - //REGEXP_EMPTY_TOK - //REGEXP_SIGMA_TOK + STRING_TO_REGEXP_TOK = 'STRING_TO_REGEXP'; + REGEXP_CONCAT_TOK = 'RE_CONCAT'; + REGEXP_UNION_TOK = 'RE_UNION'; + REGEXP_INTER_TOK = 'RE_INTER'; + REGEXP_STAR_TOK = 'RE_STAR'; + REGEXP_PLUS_TOK = 'RE_PLUS'; + REGEXP_OPT_TOK = 'RE_OPT'; + REGEXP_RANGE_TOK = 'RE_RANGE'; + REGEXP_LOOP_TOK = 'RE_LOOP'; + REGEXP_EMPTY_TOK = 'RE_EMPTY'; + REGEXP_SIGMA_TOK = 'RE_SIGMA'; SETS_CARD_TOK = 'CARD'; @@ -442,13 +439,26 @@ Expr createPrecedenceTree(Parser* parser, ExprManager* em, Expr lhs = createPrecedenceTree(parser, em, expressions, operators, startIndex, pivot); Expr rhs = createPrecedenceTree(parser, em, expressions, operators, pivot + 1, stopIndex); - switch(k) { - case kind::LEQ : if(lhs.getType().isSet()) { k = kind::SUBSET; } break; - case kind::MINUS : if(lhs.getType().isSet()) { k = kind::SETMINUS; } break; - case kind::BITVECTOR_AND: if(lhs.getType().isSet()) { k = kind::INTERSECTION; } break; - case kind::BITVECTOR_OR : if(lhs.getType().isSet()) { k = kind::UNION; } break; - default: break; + if (lhs.getType().isSet()) + { + switch (k) + { + case kind::LEQ: k = kind::SUBSET; break; + case kind::MINUS: k = kind::SETMINUS; break; + case kind::BITVECTOR_AND: k = kind::INTERSECTION; break; + case kind::BITVECTOR_OR: k = kind::UNION; break; + default: break; + } + } + else if (lhs.getType().isString()) + { + switch (k) + { + case kind::MEMBER: k = kind::STRING_IN_REGEXP; break; + default: break; + } } + Expr e = em->mkExpr(k, lhs, rhs); return negate ? em->mkExpr(kind::NOT, e) : e; }/* createPrecedenceTree() recursive variant */ @@ -2026,6 +2036,31 @@ stringTerm[CVC4::Expr& f] { f = MK_EXPR(CVC4::kind::STRING_STOI, f); } | STRING_ITOS_TOK LPAREN formula[f] RPAREN { f = MK_EXPR(CVC4::kind::STRING_ITOS, f); } + | STRING_TO_REGEXP_TOK LPAREN formula[f] RPAREN + { f = MK_EXPR(CVC4::kind::STRING_TO_REGEXP, f); } + | REGEXP_CONCAT_TOK LPAREN formula[f] { args.push_back(f); } + ( COMMA formula[f2] { args.push_back(f2); } )+ RPAREN + { f = MK_EXPR(CVC4::kind::REGEXP_CONCAT, args); } + | REGEXP_UNION_TOK LPAREN formula[f] { args.push_back(f); } + ( COMMA formula[f2] { args.push_back(f2); } )+ RPAREN + { f = MK_EXPR(CVC4::kind::REGEXP_UNION, args); } + | REGEXP_INTER_TOK LPAREN formula[f] { args.push_back(f); } + ( COMMA formula[f2] { args.push_back(f2); } )+ RPAREN + { f = MK_EXPR(CVC4::kind::REGEXP_INTER, args); } + | REGEXP_STAR_TOK LPAREN formula[f] RPAREN + { f = MK_EXPR(CVC4::kind::REGEXP_STAR, f); } + | REGEXP_PLUS_TOK LPAREN formula[f] RPAREN + { f = MK_EXPR(CVC4::kind::REGEXP_PLUS, f); } + | REGEXP_OPT_TOK LPAREN formula[f] RPAREN + { f = MK_EXPR(CVC4::kind::REGEXP_OPT, f); } + | REGEXP_RANGE_TOK LPAREN formula[f] COMMA formula[f2] RPAREN + { f = MK_EXPR(CVC4::kind::REGEXP_RANGE, f, f2); } + | REGEXP_LOOP_TOK LPAREN formula[f] COMMA formula[f2] COMMA formula[f3] RPAREN + { f = MK_EXPR(CVC4::kind::REGEXP_LOOP, f, f2, f3); } + | REGEXP_EMPTY_TOK + { f = MK_EXPR(CVC4::kind::REGEXP_EMPTY, std::vector()); } + | REGEXP_SIGMA_TOK + { f = MK_EXPR(CVC4::kind::REGEXP_SIGMA, std::vector()); } /* string literal */ | str[s] diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 194dbd663..4b7e9d7ce 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -869,6 +869,7 @@ set(regress_0_tests regress0/strings/norn-31.smt2 regress0/strings/norn-simp-rew.smt2 regress0/strings/re.all.smt2 + regress0/strings/regexp-native-simple.cvc regress0/strings/regexp_inclusion.smt2 regress0/strings/regexp_inclusion_reduction.smt2 regress0/strings/repl-rewrites2.smt2 diff --git a/test/regress/regress0/strings/regexp-native-simple.cvc b/test/regress/regress0/strings/regexp-native-simple.cvc new file mode 100644 index 000000000..49d6f3d64 --- /dev/null +++ b/test/regress/regress0/strings/regexp-native-simple.cvc @@ -0,0 +1,13 @@ +% EXPECT: sat + +x : STRING; + +ASSERT x IS_IN RE_CONCAT(RE_OPT(RE_STAR(RE_UNION(RE_RANGE("i", "j"), RE_RANGE("k", "l")))), + RE_PLUS(STRING_TO_REGEXP("abc")), + RE_LOOP(STRING_TO_REGEXP("def"), 1, 2), + RE_SIGMA); +ASSERT NOT(x IS_IN RE_INTER(RE_STAR(RE_SIGMA), RE_EMPTY)); + +ASSERT x = "ikljabcabcdefe"; + +CHECKSAT; -- 2.30.2