From cac85606876d4f0be1c6c54172f7509ce54cdcb5 Mon Sep 17 00:00:00 2001 From: Tianyi Liang Date: Thu, 26 Dec 2013 17:18:26 -0600 Subject: [PATCH] new functions in strings --- src/parser/smt2/Smt2.g | 8 ++- src/theory/strings/kinds | 7 +-- .../strings/theory_strings_preprocess.cpp | 52 ++++++++++++++++++ .../strings/theory_strings_rewriter.cpp | 40 +++++++++----- .../strings/theory_strings_type_rules.h | 54 +++++++++++++++++-- src/util/regexp.h | 19 +++++++ test/regress/regress0/strings/substr001.smt2 | 4 +- 7 files changed, 162 insertions(+), 22 deletions(-) diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g index 13850aba6..b1e37f85e 100644 --- a/src/parser/smt2/Smt2.g +++ b/src/parser/smt2/Smt2.g @@ -1252,6 +1252,8 @@ builtinOp[CVC4::Kind& kind] | STRCON_TOK { $kind = CVC4::kind::STRING_CONCAT; } | STRLEN_TOK { $kind = CVC4::kind::STRING_LENGTH; } | STRSUB_TOK { $kind = CVC4::kind::STRING_SUBSTR; } +// | STRCTN_TOK { $kind = CVC4::kind::STRING_STRCTN; } +// | STRCAT_TOK { $kind = CVC4::kind::STRING_CHARAT; } | STRINRE_TOK { $kind = CVC4::kind::STRING_IN_REGEXP; } | STRTORE_TOK { $kind = CVC4::kind::STRING_TO_REGEXP; } | RECON_TOK { $kind = CVC4::kind::REGEXP_CONCAT; } @@ -1625,7 +1627,11 @@ INT2BV_TOK : 'int2bv'; //STRCST_TOK : 'str.cst'; STRCON_TOK : 'str.++'; STRLEN_TOK : 'str.len'; -STRSUB_TOK : 'str.sub' ; +STRSUB_TOK : 'str.substr' ; +STRCTN_TOK : 'str.contain' ; +STRCAT_TOK : 'str.at' ; +//STRIDOF_TOK : 'str.indexof' ; +//STRREPL_TOK : 'str.repalce' ; STRINRE_TOK : 'str.in.re'; STRTORE_TOK : 'str.to.re'; RECON_TOK : 're.++'; diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 20db916c9..a421d6fa8 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -11,12 +11,11 @@ typechecker "theory/strings/theory_strings_type_rules.h" operator STRING_CONCAT 2: "string concat" - operator STRING_IN_REGEXP 2 "membership" - operator STRING_LENGTH 1 "string length" - operator STRING_SUBSTR 3 "string substr" +operator STRING_STRCTN 2 "string contains" +operator STRING_CHARAT 2 "string char at" #sort CHAR_TYPE \ # Cardinality::INTEGERS \ @@ -104,6 +103,8 @@ typerule STRING_TO_REGEXP ::CVC4::theory::strings::StringToRegExpTypeRule typerule STRING_CONCAT ::CVC4::theory::strings::StringConcatTypeRule typerule STRING_LENGTH ::CVC4::theory::strings::StringLengthTypeRule typerule STRING_SUBSTR ::CVC4::theory::strings::StringSubstrTypeRule +typerule STRING_STRCTN ::CVC4::theory::strings::StringContainTypeRule +typerule STRING_CHARAT ::CVC4::theory::strings::StringCharAtTypeRule typerule STRING_IN_REGEXP ::CVC4::theory::strings::StringInRegExpTypeRule diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index ff01b1887..355b182c9 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -136,6 +136,58 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { } else { throw LogicException("substring not supported in this release"); } + } else if( t.getKind() == kind::STRING_STRCTN ){ + if(options::stringExp()) { + Node w = simplify( t[0], new_nodes ); + Node y = simplify( t[1], new_nodes ); + Node emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); + std::vector< Node > or_vec; + or_vec.push_back( w.eqNode(y) ); + Node x1 = NodeManager::currentNM()->mkBoundVar( t[0].getType() ); + Node c1 = NodeManager::currentNM()->mkNode( kind::EXISTS, NodeManager::currentNM()->mkNode( kind::AND, + x1.eqNode( emptyString ).negate(), + w.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, x1, y ) ))); + or_vec.push_back( c1 ); + Node z2 = NodeManager::currentNM()->mkBoundVar( t[0].getType() ); + Node c2 = NodeManager::currentNM()->mkNode( kind::EXISTS, NodeManager::currentNM()->mkNode( kind::AND, + z2.eqNode( emptyString ).negate(), + w.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, y, z2 ) ))); + or_vec.push_back( c2 ); + Node x3 = NodeManager::currentNM()->mkBoundVar( t[0].getType() ); + Node z3 = NodeManager::currentNM()->mkBoundVar( t[0].getType() ); + Node c3 = NodeManager::currentNM()->mkNode( kind::EXISTS, NodeManager::currentNM()->mkNode( kind::AND, + x3.eqNode( emptyString ).negate(), z3.eqNode( emptyString ).negate(), + w.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, x3, y, z3 ) ))); + or_vec.push_back( c3 ); + + Node cc = NodeManager::currentNM()->mkNode( kind::OR, or_vec ); + + d_cache[t] = cc; + retNode = cc; + } else { + throw LogicException("string contain not supported in this release"); + } + } else if( t.getKind() == kind::STRING_CHARAT ){ + if(options::stringExp()) { + Node sk1 = NodeManager::currentNM()->mkSkolem( "ca1sym_$$", t.getType(), "created for charat" ); + Node sk2 = NodeManager::currentNM()->mkSkolem( "ca2sym_$$", t.getType(), "created for charat" ); + Node sk3 = NodeManager::currentNM()->mkSkolem( "ca3sym_$$", t.getType(), "created for charat" ); + Node x = simplify( t[0], new_nodes ); + Node x_eq_123 = NodeManager::currentNM()->mkNode( kind::EQUAL, + NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk2, sk3 ), x ); + new_nodes.push_back( x_eq_123 ); + Node len_sk1_eq_i = NodeManager::currentNM()->mkNode( kind::EQUAL, t[1], + NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) ); + new_nodes.push_back( len_sk1_eq_i ); + Node len_sk2_eq_1 = NodeManager::currentNM()->mkNode( kind::EQUAL, NodeManager::currentNM()->mkConst( Rational( 1 ) ), + NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk2 ) ); + new_nodes.push_back( len_sk2_eq_1 ); + + d_cache[t] = sk2; + retNode = sk2; + } else { + throw LogicException("string char at not supported in this release"); + } } else if( t.getNumChildren()>0 ){ std::vector< Node > cc; if (t.getMetaKind() == kind::metakind::PARAMETERIZED) { diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 0e37367bf..af19095a0 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -341,21 +341,35 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } } } else if(node.getKind() == kind::STRING_SUBSTR) { - if(options::stringExp()) { - if( node[0].isConst() && node[1].isConst() && node[2].isConst() ) { - int i = node[1].getConst().getNumerator().toUnsignedInt(); - int j = node[2].getConst().getNumerator().toUnsignedInt(); - if( node[0].getConst().size() >= (unsigned) (i + j) ) { - retNode = NodeManager::currentNM()->mkConst( node[0].getConst().substr(i, j) ); - } else { - // TODO: some issues, must be guarded by users - retNode = NodeManager::currentNM()->mkConst( false ); - } + if( node[0].isConst() && node[1].isConst() && node[2].isConst() ) { + int i = node[1].getConst().getNumerator().toUnsignedInt(); + int j = node[2].getConst().getNumerator().toUnsignedInt(); + if( node[0].getConst().size() >= (unsigned) (i + j) ) { + retNode = NodeManager::currentNM()->mkConst( node[0].getConst().substr(i, j) ); } else { - //handled by preprocess + // TODO: some issues, must be guarded by users + retNode = NodeManager::currentNM()->mkConst( false ); + } + } + } else if(node.getKind() == kind::STRING_STRCTN) { + if( node[0].isConst() && node[1].isConst() ) { + CVC4::String s = node[0].getConst(); + CVC4::String t = node[1].getConst(); + if( s.find(t) != std::string::npos ) { + retNode = NodeManager::currentNM()->mkConst( true ); + } else { + retNode = NodeManager::currentNM()->mkConst( false ); + } + } + } else if(node.getKind() == kind::STRING_CHARAT) { + if( node[0].isConst() && node[1].isConst() ) { + int i = node[1].getConst().getNumerator().toUnsignedInt(); + if( node[0].getConst().size() > (unsigned) i ) { + retNode = NodeManager::currentNM()->mkConst( node[0].getConst().substr(i, 1) ); + } else { + // TODO: some issues, must be guarded by users + retNode = NodeManager::currentNM()->mkConst( false ); } - } else { - throw LogicException("substring not supported in this release"); } } else if(node.getKind() == kind::STRING_IN_REGEXP) { retNode = rewriteMembership(node); diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index d5019ab39..df9d29f0f 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -57,6 +57,9 @@ public: inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check) throw (TypeCheckingExceptionPrivate, AssertionException) { if( check ){ + if(n.getNumChildren() != 1) { + throw TypeCheckingExceptionPrivate(n, "expecting 1 term in string length"); + } TypeNode t = n[0].getType(check); if (!t.isString()) { throw TypeCheckingExceptionPrivate(n, "expecting string terms in string length"); @@ -71,17 +74,62 @@ public: inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check) throw (TypeCheckingExceptionPrivate, AssertionException) { if( check ){ + if(n.getNumChildren() != 3) { + throw TypeCheckingExceptionPrivate(n, "expecting 3 terms in substr"); + } TypeNode t = n[0].getType(check); if (!t.isString()) { - throw TypeCheckingExceptionPrivate(n, "expecting string terms in substr"); + throw TypeCheckingExceptionPrivate(n, "expecting a string term in substr"); } t = n[1].getType(check); if (!t.isInteger()) { - throw TypeCheckingExceptionPrivate(n, "expecting start int terms in substr"); + throw TypeCheckingExceptionPrivate(n, "expecting a start int term in substr"); } t = n[2].getType(check); if (!t.isInteger()) { - throw TypeCheckingExceptionPrivate(n, "expecting length int terms in substr"); + throw TypeCheckingExceptionPrivate(n, "expecting a length int term in substr"); + } + } + return nodeManager->stringType(); + } +}; + +class StringContainTypeRule { +public: + inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check) + throw (TypeCheckingExceptionPrivate, AssertionException) { + if( check ){ + if(n.getNumChildren() != 2) { + throw TypeCheckingExceptionPrivate(n, "expecting 2 terms in string contain"); + } + TypeNode t = n[0].getType(check); + if (!t.isString()) { + throw TypeCheckingExceptionPrivate(n, "expecting an orginal string term in string contain"); + } + t = n[1].getType(check); + if (!t.isString()) { + throw TypeCheckingExceptionPrivate(n, "expecting a target string term in string contain"); + } + } + return nodeManager->stringType(); + } +}; + +class StringCharAtTypeRule { +public: + inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check) + throw (TypeCheckingExceptionPrivate, AssertionException) { + if( check ){ + if(n.getNumChildren() != 2) { + throw TypeCheckingExceptionPrivate(n, "expecting 2 terms in string char at"); + } + TypeNode t = n[0].getType(check); + if (!t.isString()) { + throw TypeCheckingExceptionPrivate(n, "expecting a string term in string char at"); + } + t = n[1].getType(check); + if (!t.isInteger()) { + throw TypeCheckingExceptionPrivate(n, "expecting an integer string term in string char at"); } } return nodeManager->stringType(); diff --git a/src/util/regexp.h b/src/util/regexp.h index 3a8fc7170..3f9df6aaf 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -183,6 +183,25 @@ public: return true; } + std::size_t find(const String &y) const { + if(y.d_str.size() == 0) return 0; + if(d_str.size() == 0) return std::string::npos; + std::size_t ret = std::string::npos; + for(int i = 0; i <= (int) d_str.size() - (int) y.d_str.size(); i++) { + if(d_str[i] == y.d_str[0]) { + std::size_t j=0; + for(; j ret_vec; std::vector::const_iterator itr = d_str.begin() + i; diff --git a/test/regress/regress0/strings/substr001.smt2 b/test/regress/regress0/strings/substr001.smt2 index 2b2ae9820..c0554c481 100644 --- a/test/regress/regress0/strings/substr001.smt2 +++ b/test/regress/regress0/strings/substr001.smt2 @@ -8,8 +8,8 @@ (declare-fun i3 () Int) (declare-fun i4 () Int) -(assert (= "efg" (str.sub x i1 i2) ) ) -(assert (= "bef" (str.sub x i3 i4) ) ) +(assert (= "efg" (str.substr x i1 i2) ) ) +(assert (= "bef" (str.substr x i3 i4) ) ) (assert (> (str.len x) 5)) (check-sat) -- 2.30.2