Term Solver::mkString(const unsigned char c) const
{
- return d_exprMgr->mkConst(String(c));
+ return d_exprMgr->mkConst(String(std::string(1, c)));
}
Term Solver::mkString(const std::vector<unsigned>& s) const
#include "expr/kind.h"
#include "options/strings_options.h"
+#include "theory/strings/theory_strings_rewriter.h"
namespace CVC4 {
namespace theory {
namespace strings {
RegExpOpr::RegExpOpr()
- : d_lastchar(options::stdPrintASCII() ? '\x7f' : '\xff'),
- d_emptyString(NodeManager::currentNM()->mkConst(::CVC4::String(""))),
+ : d_emptyString(NodeManager::currentNM()->mkConst(::CVC4::String(""))),
d_true(NodeManager::currentNM()->mkConst(true)),
d_false(NodeManager::currentNM()->mkConst(false)),
d_emptySingleton(NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP,
d_zero(NodeManager::currentNM()->mkConst(::CVC4::Rational(0))),
d_one(NodeManager::currentNM()->mkConst(::CVC4::Rational(1))),
RMAXINT(LONG_MAX),
- d_char_start(),
- d_char_end(),
d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA,
std::vector<Node>{})),
d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma))
{
+ d_lastchar = TheoryStringsRewriter::getAlphabetCardinality()-1;
}
RegExpOpr::~RegExpOpr() {}
if(tmp == d_emptyString) {
ret = 2;
} else {
- if(tmp.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
+ if (tmp.getConst<CVC4::String>().front() == c.front())
+ {
retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
} else {
if(tmp.getKind() == kind::STRING_CONCAT) {
Node t2 = tmp[0];
if(t2.isConst()) {
- if(t2.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
+ if (t2.getConst<CVC4::String>().front() == c.front())
+ {
Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
std::vector< Node > vec_nodes;
if(r[0] == d_emptyString) {
retNode = d_emptyRegexp;
} else {
- if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
+ if (r[0].getConst<CVC4::String>().front() == c.front())
+ {
retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
} else {
return retNode;
}
-void RegExpOpr::firstChars( Node r, std::set<unsigned char> &pcset, SetNodes &pvset ) {
+void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
+{
Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl;
- std::map< Node, std::pair< std::set<unsigned char>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);
+ std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr =
+ d_fset_cache.find(r);
if(itr != d_fset_cache.end()) {
pcset.insert((itr->second).first.begin(), (itr->second).first.end());
pvset.insert((itr->second).second.begin(), (itr->second).second.end());
} else {
- std::set<unsigned char> cset;
+ // cset is code points
+ std::set<unsigned> cset;
SetNodes vset;
int k = r.getKind();
switch( k ) {
break;
}
case kind::REGEXP_SIGMA: {
- for(unsigned char i='\0'; i<=d_lastchar; i++) {
+ Assert(d_lastchar < std::numeric_limits<unsigned>::max());
+ for (unsigned i = 0; i <= d_lastchar; i++)
+ {
cset.insert(i);
}
break;
}
case kind::REGEXP_RANGE: {
- unsigned char a = r[0].getConst<String>().getFirstChar();
- unsigned char b = r[1].getConst<String>().getFirstChar();
- for(unsigned char c=a; c<=b; c++) {
+ unsigned a = r[0].getConst<String>().front();
+ a = String::convertUnsignedIntToCode(a);
+ unsigned b = r[1].getConst<String>().front();
+ b = String::convertUnsignedIntToCode(b);
+ Assert(a < b);
+ Assert(b < std::numeric_limits<unsigned>::max());
+ for (unsigned c = a; c <= b; c++)
+ {
cset.insert(c);
}
break;
if(st.isConst()) {
CVC4::String s = st.getConst< CVC4::String >();
if(s.size() != 0) {
- cset.insert(s.getFirstChar());
+ unsigned sc = s.front();
+ sc = String::convertUnsignedIntToCode(sc);
+ cset.insert(sc);
}
- } else if(st.getKind() == kind::VARIABLE) {
- vset.insert( st );
- } else {
+ }
+ else if (st.getKind() == kind::STRING_CONCAT)
+ {
if(st[0].isConst()) {
- CVC4::String s = st[0].getConst< CVC4::String >();
- cset.insert(s.getFirstChar());
+ CVC4::String s = st[0].getConst<CVC4::String>();
+ unsigned sc = s.front();
+ sc = String::convertUnsignedIntToCode(sc);
+ cset.insert(sc);
} else {
vset.insert( st[0] );
}
}
+ else
+ {
+ vset.insert(st);
+ }
break;
}
case kind::REGEXP_CONCAT: {
}
pcset.insert(cset.begin(), cset.end());
pvset.insert(vset.begin(), vset.end());
- std::pair< std::set<unsigned char>, SetNodes > p(cset, vset);
+ std::pair<std::set<unsigned>, SetNodes> p(cset, vset);
d_fset_cache[r] = p;
}
if(Trace.isOn("regexp-fset")) {
Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {";
- for(std::set<unsigned char>::const_iterator itr = pcset.begin();
- itr != pcset.end(); itr++) {
- if(itr != pcset.begin()) {
- Trace("regexp-fset") << ",";
- }
- Trace("regexp-fset") << (*itr);
+ for (std::set<unsigned>::const_iterator itr = pcset.begin();
+ itr != pcset.end();
+ itr++)
+ {
+ if (itr != pcset.begin())
+ {
+ Trace("regexp-fset") << ",";
+ }
+ Trace("regexp-fset") << (*itr);
}
Trace("regexp-fset") << "}" << std::endl;
}
}
void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
std::pair < Node, Node > p(s, r);
+ NodeManager *nm = NodeManager::currentNM();
std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
if(itr != d_simpl_neg_cache.end()) {
new_nodes.push_back( itr->second );
}
case kind::REGEXP_RANGE: {
std::vector< Node > vec;
- unsigned char a = r[0].getConst<String>().getFirstChar();
- unsigned char b = r[1].getConst<String>().getFirstChar();
- for(unsigned char c=a; c<=b; c++) {
- Node tmp = s.eqNode( NodeManager::currentNM()->mkConst( CVC4::String(c) ) ).negate();
+ unsigned a = r[0].getConst<String>().front();
+ a = String::convertUnsignedIntToCode(a);
+ unsigned b = r[1].getConst<String>().front();
+ b = String::convertUnsignedIntToCode(b);
+ for (unsigned c = a; c <= b; c++)
+ {
+ std::vector<unsigned> tmpVec;
+ tmpVec.push_back(String::convertCodeToUnsignedInt(c));
+ Node tmp = s.eqNode(nm->mkConst(String(tmpVec))).negate();
vec.push_back( tmp );
}
conc = vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::AND, vec);
}
void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
std::pair < Node, Node > p(s, r);
+ NodeManager *nm = NodeManager::currentNM();
std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
if(itr != d_simpl_cache.end()) {
new_nodes.push_back( itr->second );
case kind::REGEXP_RANGE: {
conc = s.eqNode( r[0] );
if(r[0] != r[1]) {
- unsigned char a = r[0].getConst<String>().getFirstChar();
- unsigned char b = r[1].getConst<String>().getFirstChar();
+ unsigned a = r[0].getConst<String>().front();
+ unsigned b = r[1].getConst<String>().front();
a += 1;
- Node tmp = a!=b? NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s,
- NodeManager::currentNM()->mkNode(kind::REGEXP_RANGE,
- NodeManager::currentNM()->mkConst( CVC4::String(a) ),
- r[1])) :
- s.eqNode(r[1]);
+ std::vector<unsigned> anvec;
+ anvec.push_back(a);
+ Node an = nm->mkConst(String(anvec));
+ Node tmp = a != b
+ ? nm->mkNode(kind::STRING_IN_REGEXP,
+ s,
+ nm->mkNode(kind::REGEXP_RANGE, an, r[1]))
+ : s.eqNode(r[1]);
conc = NodeManager::currentNM()->mkNode(kind::OR, conc, tmp);
}
- /*
- unsigned char a = r[0].getConst<String>().getFirstChar();
- unsigned char b = r[1].getConst<String>().getFirstChar();
- std::vector< Node > vec;
- for(unsigned char c=a; c<=b; c++) {
- Node t2 = s.eqNode( NodeManager::currentNM()->mkConst( CVC4::String(c) ));
- vec.push_back( t2 );
- }
- conc = vec.empty()? d_emptySingleton : vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::OR, vec);
- */
break;
}
case kind::STRING_TO_REGEXP: {
rNode = itrcache->second;
} else {
Trace("regexp-int-debug") << " ... normal without cache" << std::endl;
- std::vector< unsigned char > cset;
- std::set< unsigned char > cset1, cset2;
+ std::vector<unsigned> cset;
+ std::set<unsigned> cset1, cset2;
std::set< Node > vset1, vset2;
firstChars(r1, cset1, vset1);
firstChars(r2, cset2, vset2);
}
if(Trace.isOn("regexp-int-debug")) {
Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {";
- for(std::vector<unsigned char>::const_iterator itr = cset.begin();
- itr != cset.end(); itr++) {
+ for (std::vector<unsigned>::const_iterator itr = cset.begin();
+ itr != cset.end();
+ itr++)
+ {
//CVC4::String c( *itr );
if(itr != cset.begin()) {
Trace("regexp-int-debug") << ", ";
Trace("regexp-int-debug") << std::endl;
}
std::map< PairNodes, Node > cacheX;
- for(std::vector<unsigned char>::const_iterator itr = cset.begin();
- itr != cset.end(); itr++) {
- CVC4::String c( *itr );
+ for (std::vector<unsigned>::const_iterator itr = cset.begin();
+ itr != cset.end();
+ itr++)
+ {
+ std::vector<unsigned> cvec;
+ cvec.push_back(String::convertCodeToUnsignedInt(*itr));
+ String c(cvec);
Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
Node r1l = derivativeSingle(r1, c);
Node r2l = derivativeSingle(r2, c);
typedef std::set< Node > SetNodes;
typedef std::pair< Node, Node > PairNodes;
-private:
- unsigned char d_lastchar;
+ private:
+ /** the code point of the last character in the alphabet we are using */
+ unsigned d_lastchar;
Node d_emptyString;
Node d_true;
Node d_false;
Node d_one;
CVC4::Rational RMAXINT;
- unsigned char d_char_start;
- unsigned char d_char_end;
Node d_sigma;
Node d_sigma_star;
- std::map< PairNodes, Node > d_simpl_cache;
- std::map< PairNodes, Node > d_simpl_neg_cache;
- std::map< Node, std::pair< int, Node > > d_delta_cache;
- std::map< PairNodeStr, Node > d_dv_cache;
- std::map< PairNodeStr, std::pair< Node, int > > d_deriv_cache;
- std::map< Node, std::pair< Node, int > > d_compl_cache;
- std::map< Node, bool > d_cstre_cache;
- std::map< Node, std::pair< std::set<unsigned char>, std::set<Node> > > d_cset_cache;
- std::map< Node, std::pair< std::set<unsigned char>, std::set<Node> > > d_fset_cache;
- std::map< PairNodes, Node > d_inter_cache;
- std::map< Node, Node > d_rm_inter_cache;
- std::map< Node, bool > d_norv_cache;
- std::map< Node, std::vector< PairNodes > > d_split_cache;
- //bool checkStarPlus( Node t );
- void simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes );
- void simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes );
- std::string niceChar( Node r );
- Node mkAllExceptOne( unsigned char c );
- bool isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2);
+ std::map<PairNodes, Node> d_simpl_cache;
+ std::map<PairNodes, Node> d_simpl_neg_cache;
+ std::map<Node, std::pair<int, Node> > d_delta_cache;
+ std::map<PairNodeStr, Node> d_dv_cache;
+ std::map<PairNodeStr, std::pair<Node, int> > d_deriv_cache;
+ std::map<Node, std::pair<Node, int> > d_compl_cache;
+ std::map<Node, bool> d_cstre_cache;
+ std::map<Node, std::pair<std::set<unsigned>, std::set<Node> > > d_cset_cache;
+ std::map<Node, std::pair<std::set<unsigned>, std::set<Node> > > d_fset_cache;
+ std::map<PairNodes, Node> d_inter_cache;
+ std::map<Node, Node> d_rm_inter_cache;
+ std::map<Node, bool> d_norv_cache;
+ std::map<Node, std::vector<PairNodes> > d_split_cache;
+ void simplifyPRegExp(Node s, Node r, std::vector<Node> &new_nodes);
+ void simplifyNRegExp(Node s, Node r, std::vector<Node> &new_nodes);
+ std::string niceChar(Node r);
+ Node mkAllExceptOne(unsigned c);
+ bool isPairNodesInSet(std::set<PairNodes> &s, Node n1, Node n2);
bool containC2(unsigned cnt, Node n);
Node convert1(unsigned cnt, Node n);
void convert2(unsigned cnt, Node n, Node &r1, Node &r2);
bool testNoRV(Node r);
- Node intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt );
+ Node intersectInternal(Node r1,
+ Node r2,
+ std::map<PairNodes, Node> cache,
+ unsigned cnt);
Node removeIntersection(Node r);
- void firstChars( Node r, std::set<unsigned char> &pcset, SetNodes &pvset );
-public:
+ void firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset);
+
+ public:
RegExpOpr();
~RegExpOpr();
d_true = NodeManager::currentNM()->mkConst( true );
d_false = NodeManager::currentNM()->mkConst( false );
- d_card_size = 256;
- if (options::stdPrintASCII())
- {
- d_card_size = 128;
- }
+ d_card_size = TheoryStringsRewriter::getAlphabetCardinality();
}
TheoryStrings::~TheoryStrings() {
throw LogicException(ss.str());
}
if( tn.isString() ) {
+ // all characters of constants should fall in the alphabet
+ if (n.isConst())
+ {
+ std::vector<unsigned> vec = n.getConst<String>().getVec();
+ for (unsigned u : vec)
+ {
+ if (u >= d_card_size)
+ {
+ std::stringstream ss;
+ ss << "Characters in string \"" << n
+ << "\" are outside of the given alphabet.";
+ throw LogicException(ss.str());
+ }
+ }
+ }
// if finite model finding is enabled,
// then we minimize the length of this term if it is a variable
// but not an internally generated Skolem, or a term that does
else if (rc.getKind() == kind::REGEXP_RANGE
|| rc.getKind() == kind::REGEXP_SIGMA)
{
- CVC4::String ss( t==0 ? s.getLastChar() : s.getFirstChar() );
+ std::vector<unsigned> ssVec;
+ ssVec.push_back(t == 0 ? s.back() : s.front());
+ CVC4::String ss(ssVec);
if( testConstStringInRegExp( ss, 0, rc ) ){
//strip off one character
mchildren.pop_back();
return Node::null();
}
+unsigned TheoryStringsRewriter::getAlphabetCardinality()
+{
+ if (options::stdPrintASCII())
+ {
+ Assert(128 <= String::num_codes());
+ return 128;
+ }
+ Assert(256 <= String::num_codes());
+ return 256;
+}
+
Node TheoryStringsRewriter::rewriteEquality(Node node)
{
Assert(node.getKind() == kind::EQUAL);
}
case kind::REGEXP_RANGE: {
if(s.size() == index_start + 1) {
- unsigned char a = r[0].getConst<String>().getFirstChar();
- unsigned char b = r[1].getConst<String>().getFirstChar();
- unsigned char c = s.getLastChar();
+ unsigned a = r[0].getConst<String>().front();
+ a = String::convertUnsignedIntToCode(a);
+ unsigned b = r[1].getConst<String>().front();
+ b = String::convertUnsignedIntToCode(b);
+ unsigned c = s.back();
+ c = String::convertUnsignedIntToCode(c);
return (a <= c && c <= b);
} else {
return false;
static inline void init() {}
static inline void shutdown() {}
+ /** get the cardinality of the alphabet used, based on the options */
+ static unsigned getAlphabetCardinality();
/** rewrite equality
*
* This method returns a formula that is equivalent to the equality between
{
if( check ) {
TNode::iterator it = n.begin();
- unsigned char ch[2];
+ unsigned ch[2];
for(int i=0; i<2; ++i) {
TypeNode t = (*it).getType(check);
if( (*it).getConst<String>().size() != 1 ) {
throw TypeCheckingExceptionPrivate(n, "expecting a single constant string term in regexp range");
}
- ch[i] = (*it).getConst<String>().getFirstChar();
+ unsigned ci = (*it).getConst<String>().front();
+ ch[i] = String::convertUnsignedIntToCode(ci);
++it;
}
if(ch[0] > ch[1]) {
throw TypeCheckingExceptionPrivate(n, "expecting the first constant is less or equal to the second one in regexp range");
}
- if (options::stdPrintASCII() && ch[1] > '\x7f')
+ unsigned maxCh = options::stdPrintASCII() ? 127 : 255;
+ if (ch[1] > maxCh)
{
- throw TypeCheckingExceptionPrivate(n,
- "expecting standard ASCII "
- "characters in regexp range when "
- "strings-print-ascii is true");
+ std::stringstream ss;
+ ss << "expecting characters whose code point is less than or equal to "
+ << maxCh;
+ throw TypeCheckingExceptionPrivate(n, ss.str());
}
}
return nodeManager->regExpType();
#include <sstream>
-#include "util/regexp.h"
-#include "theory/type_enumerator.h"
-#include "expr/type_node.h"
#include "expr/kind.h"
+#include "expr/type_node.h"
+#include "theory/strings/theory_strings_rewriter.h"
+#include "theory/type_enumerator.h"
+#include "util/regexp.h"
namespace CVC4 {
namespace theory {
{
Assert(type.getKind() == kind::TYPE_CONSTANT &&
type.getConst<TypeConstant>() == STRING_TYPE);
- d_cardinality = 256;
+ d_cardinality = TheoryStringsRewriter::getAlphabetCardinality();
mkCurr();
}
Node operator*() override { return d_curr; }
return (i + start_code()) % num_codes();
}
+String::String(const std::vector<unsigned> &s) : d_str(s)
+{
+#ifdef CVC4_ASSERTIONS
+ for (unsigned u : d_str)
+ {
+ Assert(convertUnsignedIntToCode(u) < num_codes());
+ }
+#endif
+}
+
int String::cmp(const String &y) const {
if (size() != y.size()) {
return size() < y.size() ? -1 : 1;
}
for (unsigned int i = 0; i < size(); ++i) {
if (d_str[i] != y.d_str[i]) {
- return getUnsignedCharAt(i) < y.getUnsignedCharAt(i) ? -1 : 1;
+ unsigned cp = convertUnsignedIntToCode(d_str[i]);
+ unsigned cpy = convertUnsignedIntToCode(y.d_str[i]);
+ return cp < cpy ? -1 : 1;
}
}
return 0;
i++;
}
}
+#ifdef CVC4_ASSERTIONS
+ for (unsigned u : str)
+ {
+ Assert(convertUnsignedIntToCode(u) < num_codes());
+ }
+#endif
return str;
}
-unsigned char String::getUnsignedCharAt(size_t pos) const {
- Assert(pos < size());
- return convertUnsignedIntToChar(d_str[pos]);
+unsigned String::front() const
+{
+ Assert(!d_str.empty());
+ return d_str.front();
+}
+
+unsigned String::back() const
+{
+ Assert(!d_str.empty());
+ return d_str.back();
}
std::size_t String::overlap(const String &y) const {
* This is the cardinality of the alphabet that is representable by this
* class. Notice that this must be greater than or equal to the cardinality
* of the alphabet that the string theory reasons about.
+ *
+ * This must be strictly less than std::numeric_limits<unsigned>::max().
*/
static inline unsigned num_codes() { return 256; }
/**
: d_str(toInternal(s, useEscSequences)) {}
explicit String(const char* s, bool useEscSequences = false)
: d_str(toInternal(std::string(s), useEscSequences)) {}
- explicit String(const unsigned char c)
- : d_str({convertCharToUnsignedInt(c)}) {}
- explicit String(const std::vector<unsigned>& s) : d_str(s) {}
+ explicit String(const std::vector<unsigned>& s);
String& operator=(const String& y) {
if (this != &y) {
/** Return the length of the string */
std::size_t size() const { return d_str.size(); }
- unsigned char getFirstChar() const { return getUnsignedCharAt(0); }
- unsigned char getLastChar() const { return getUnsignedCharAt(size() - 1); }
-
bool isRepeated() const;
bool tailcmp(const String& y, int& c) const;
bool isNumber() const;
/** Returns the corresponding rational for the text of this string. */
Rational toNumber() const;
-
+ /** get the internal unsigned representation of this string */
const std::vector<unsigned>& getVec() const { return d_str; }
+ /** get the internal unsigned value of the first character in this string */
+ unsigned front() const;
+ /** get the internal unsigned value of the last character in this string */
+ unsigned back() const;
/** is the unsigned a digit?
* The input should be the same type as the element type of d_str
*/
static std::vector<unsigned> toInternal(const std::string& s,
bool useEscSequences = true);
- unsigned char getUnsignedCharAt(size_t pos) const;
/**
* Returns a negative number if *this < y, 0 if *this and y are equal and a
regress1/strings/norn-ab.smt2 \
regress1/strings/norn-nel-bug-052116.smt2 \
regress1/strings/norn-simp-rew-sat.smt2 \
+ regress1/strings/nterm-re-inter-sigma.smt2 \
regress1/strings/pierre150331.smt2 \
regress1/strings/re-unsound-080718.smt2 \
regress1/strings/regexp001.smt2 \
--- /dev/null
+(set-info :smt-lib-version 2.5)
+(set-logic ALL)
+(set-info :status sat)
+(set-option :strings-exp true)
+(declare-fun x () String)
+
+(assert
+(not (= (str.in.re x (re.++ (re.++ re.allchar re.allchar ) (re.* re.allchar ))) (not (str.in.re x (re.* (str.to.re "A")))))
+))
+
+(check-sat)