src/theory/strings/strings_entail.cpp

   1 /*********************                                                        */
   2 /*! \file strings_entail.cpp
   3  ** \verbatim
   4  ** Top contributors (to current version):
   5  **   Andrew Reynolds, Andres Noetzli
   6  ** This file is part of the CVC4 project.
   7  ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
   8  ** in the top-level source directory) and their institutional affiliations.
   9  ** All rights reserved.  See the file COPYING in the top-level source
  10  ** directory for licensing information.\endverbatim
  11  **
  12  ** \brief Implementation of entailment tests involving strings.
  13  **/
  14
  15 #include "theory/strings/strings_entail.h"
  16
  17 #include "expr/node_builder.h"
  18 #include "theory/rewriter.h"
  19 #include "theory/strings/arith_entail.h"
  20 #include "theory/strings/sequences_rewriter.h"
  21 #include "theory/strings/theory_strings_utils.h"
  22 #include "theory/strings/word.h"
  23
  24 using namespace CVC4::kind;
  25
  26 namespace CVC4 {
  27 namespace theory {
  28 namespace strings {
  29
  30 StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter)
  31 {
  32 }
  33
  34 bool StringsEntail::canConstantContainConcat(Node c,
  35                                              Node n,
  36                                              int& firstc,
  37                                              int& lastc)
  38 {
  39   Assert(c.isConst());
  40   CVC4::String t = c.getConst<String>();
  41   const std::vector<unsigned>& tvec = t.getVec();
  42   Assert(n.getKind() == STRING_CONCAT);
  43   // must find constant components in order
  44   size_t pos = 0;
  45   firstc = -1;
  46   lastc = -1;
  47   for (unsigned i = 0; i < n.getNumChildren(); i++)
  48   {
  49     if (n[i].isConst())
  50     {
  51       firstc = firstc == -1 ? i : firstc;
  52       lastc = i;
  53       CVC4::String s = n[i].getConst<String>();
  54       size_t new_pos = t.find(s, pos);
  55       if (new_pos == std::string::npos)
  56       {
  57         return false;
  58       }
  59       else
  60       {
  61         pos = new_pos + s.size();
  62       }
  63     }
  64     else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
  65     {
  66       // find the first occurrence of a digit starting at pos
  67       while (pos < tvec.size() && !String::isDigit(tvec[pos]))
  68       {
  69         pos++;
  70       }
  71       if (pos == tvec.size())
  72       {
  73         return false;
  74       }
  75       // must consume at least one digit here
  76       pos++;
  77     }
  78   }
  79   return true;
  80 }
  81
  82 bool StringsEntail::canConstantContainList(Node c,
  83                                            std::vector<Node>& l,
  84                                            int& firstc,
  85                                            int& lastc)
  86 {
  87   Assert(c.isConst());
  88   // must find constant components in order
  89   size_t pos = 0;
  90   firstc = -1;
  91   lastc = -1;
  92   for (unsigned i = 0; i < l.size(); i++)
  93   {
  94     if (l[i].isConst())
  95     {
  96       firstc = firstc == -1 ? i : firstc;
  97       lastc = i;
  98       size_t new_pos = Word::find(c, l[i], pos);
  99       if (new_pos == std::string::npos)
 100       {
 101         return false;
 102       }
 103       else
 104       {
 105         pos = new_pos + Word::getLength(l[i]);
 106       }
 107     }
 108   }
 109   return true;
 110 }
 111
 112 bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
 113                                         std::vector<Node>& nr,
 114                                         int dir,
 115                                         Node& curr)
 116 {
 117   Assert(dir == 1 || dir == -1);
 118   Assert(nr.empty());
 119   Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
 120   bool ret = false;
 121   bool success;
 122   unsigned sindex = 0;
 123   do
 124   {
 125     Assert(!curr.isNull());
 126     success = false;
 127     if (curr != zero && sindex < n1.size())
 128     {
 129       unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
 130       if (n1[sindex_use].isConst())
 131       {
 132         // could strip part of a constant
 133         Node lowerBound =
 134             ArithEntail::getConstantBound(Rewriter::rewrite(curr));
 135         if (!lowerBound.isNull())
 136         {
 137           Assert(lowerBound.isConst());
 138           Rational lbr = lowerBound.getConst<Rational>();
 139           if (lbr.sgn() > 0)
 140           {
 141             Assert(ArithEntail::check(curr, true));
 142             CVC4::String s = n1[sindex_use].getConst<String>();
 143             Node ncl =
 144                 NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
 145             Node next_s =
 146                 NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl);
 147             next_s = Rewriter::rewrite(next_s);
 148             Assert(next_s.isConst());
 149             // we can remove the entire constant
 150             if (next_s.getConst<Rational>().sgn() >= 0)
 151             {
 152               curr = Rewriter::rewrite(
 153                   NodeManager::currentNM()->mkNode(MINUS, curr, ncl));
 154               success = true;
 155               sindex++;
 156             }
 157             else
 158             {
 159               // we can remove part of the constant
 160               // lower bound minus the length of a concrete string is negative,
 161               // hence lowerBound cannot be larger than long max
 162               Assert(lbr < Rational(String::maxSize()));
 163               curr = Rewriter::rewrite(
 164                   NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound));
 165               uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
 166               Assert(lbsize < s.size());
 167               if (dir == 1)
 168               {
 169                 // strip partially from the front
 170                 nr.push_back(
 171                     NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
 172                 n1[sindex_use] = NodeManager::currentNM()->mkConst(
 173                     s.suffix(s.size() - lbsize));
 174               }
 175               else
 176               {
 177                 // strip partially from the back
 178                 nr.push_back(
 179                     NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
 180                 n1[sindex_use] = NodeManager::currentNM()->mkConst(
 181                     s.prefix(s.size() - lbsize));
 182               }
 183               ret = true;
 184             }
 185             Assert(ArithEntail::check(curr));
 186           }
 187           else
 188           {
 189             // we cannot remove the constant
 190           }
 191         }
 192       }
 193       else
 194       {
 195         Node next_s = NodeManager::currentNM()->mkNode(
 196             MINUS,
 197             curr,
 198             NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
 199         next_s = Rewriter::rewrite(next_s);
 200         if (ArithEntail::check(next_s))
 201         {
 202           success = true;
 203           curr = next_s;
 204           sindex++;
 205         }
 206       }
 207     }
 208   } while (success);
 209   if (sindex > 0)
 210   {
 211     if (dir == 1)
 212     {
 213       nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
 214       n1.erase(n1.begin(), n1.begin() + sindex);
 215     }
 216     else
 217     {
 218       nr.insert(nr.end(), n1.end() - sindex, n1.end());
 219       n1.erase(n1.end() - sindex, n1.end());
 220     }
 221     ret = true;
 222   }
 223   return ret;
 224 }
 225
 226 int StringsEntail::componentContains(std::vector<Node>& n1,
 227                                      std::vector<Node>& n2,
 228                                      std::vector<Node>& nb,
 229                                      std::vector<Node>& ne,
 230                                      bool computeRemainder,
 231                                      int remainderDir)
 232 {
 233   Assert(nb.empty());
 234   Assert(ne.empty());
 235   // if n2 is a singleton, we can do optimized version here
 236   if (n2.size() == 1)
 237   {
 238     for (unsigned i = 0; i < n1.size(); i++)
 239     {
 240       Node n1rb;
 241       Node n1re;
 242       if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
 243       {
 244         if (computeRemainder)
 245         {
 246           n1[i] = n2[0];
 247           if (remainderDir != -1)
 248           {
 249             if (!n1re.isNull())
 250             {
 251               ne.push_back(n1re);
 252             }
 253             ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
 254             n1.erase(n1.begin() + i + 1, n1.end());
 255           }
 256           else if (!n1re.isNull())
 257           {
 258             n1[i] = Rewriter::rewrite(
 259                 NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
 260           }
 261           if (remainderDir != 1)
 262           {
 263             nb.insert(nb.end(), n1.begin(), n1.begin() + i);
 264             n1.erase(n1.begin(), n1.begin() + i);
 265             if (!n1rb.isNull())
 266             {
 267               nb.push_back(n1rb);
 268             }
 269           }
 270           else if (!n1rb.isNull())
 271           {
 272             n1[i] = Rewriter::rewrite(
 273                 NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
 274           }
 275         }
 276         return i;
 277       }
 278     }
 279   }
 280   else if (n1.size() >= n2.size())
 281   {
 282     unsigned diff = n1.size() - n2.size();
 283     for (unsigned i = 0; i <= diff; i++)
 284     {
 285       Node n1rb_first;
 286       Node n1re_first;
 287       // first component of n2 must be a suffix
 288       if (componentContainsBase(n1[i],
 289                                 n2[0],
 290                                 n1rb_first,
 291                                 n1re_first,
 292                                 1,
 293                                 computeRemainder && remainderDir != 1))
 294       {
 295         Assert(n1re_first.isNull());
 296         for (unsigned j = 1; j < n2.size(); j++)
 297         {
 298           // are we in the last component?
 299           if (j + 1 == n2.size())
 300           {
 301             Node n1rb_last;
 302             Node n1re_last;
 303             // last component of n2 must be a prefix
 304             if (componentContainsBase(n1[i + j],
 305                                       n2[j],
 306                                       n1rb_last,
 307                                       n1re_last,
 308                                       -1,
 309                                       computeRemainder && remainderDir != -1))
 310             {
 311               Assert(n1rb_last.isNull());
 312               if (computeRemainder)
 313               {
 314                 if (remainderDir != -1)
 315                 {
 316                   if (!n1re_last.isNull())
 317                   {
 318                     ne.push_back(n1re_last);
 319                   }
 320                   ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
 321                   n1.erase(n1.begin() + i + j + 1, n1.end());
 322                   n1[i + j] = n2[j];
 323                 }
 324                 if (remainderDir != 1)
 325                 {
 326                   n1[i] = n2[0];
 327                   nb.insert(nb.end(), n1.begin(), n1.begin() + i);
 328                   n1.erase(n1.begin(), n1.begin() + i);
 329                   if (!n1rb_first.isNull())
 330                   {
 331                     nb.push_back(n1rb_first);
 332                   }
 333                 }
 334               }
 335               return i;
 336             }
 337             else
 338             {
 339               break;
 340             }
 341           }
 342           else if (n1[i + j] != n2[j])
 343           {
 344             break;
 345           }
 346         }
 347       }
 348     }
 349   }
 350   return -1;
 351 }
 352
 353 bool StringsEntail::componentContainsBase(
 354     Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
 355 {
 356   Assert(n1rb.isNull());
 357   Assert(n1re.isNull());
 358
 359   NodeManager* nm = NodeManager::currentNM();
 360
 361   if (n1 == n2)
 362   {
 363     return true;
 364   }
 365   else
 366   {
 367     if (n1.isConst() && n2.isConst())
 368     {
 369       size_t len1 = Word::getLength(n1);
 370       size_t len2 = Word::getLength(n2);
 371       if (len2 < len1)
 372       {
 373         if (dir == 1)
 374         {
 375           if (Word::suffix(n1, len2) == n2)
 376           {
 377             if (computeRemainder)
 378             {
 379               n1rb = Word::prefix(n1, len1 - len2);
 380             }
 381             return true;
 382           }
 383         }
 384         else if (dir == -1)
 385         {
 386           if (Word::prefix(n1, len2) == n2)
 387           {
 388             if (computeRemainder)
 389             {
 390               n1re = Word::suffix(n1, len1 - len2);
 391             }
 392             return true;
 393           }
 394         }
 395         else
 396         {
 397           size_t f = Word::find(n1, n2);
 398           if (f != std::string::npos)
 399           {
 400             if (computeRemainder)
 401             {
 402               if (f > 0)
 403               {
 404                 n1rb = Word::prefix(n1, f);
 405               }
 406               if (len1 > f + len2)
 407               {
 408                 n1re = Word::suffix(n1, len1 - (f + len2));
 409               }
 410             }
 411             return true;
 412           }
 413         }
 414       }
 415     }
 416     else
 417     {
 418       // cases for:
 419       //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
 420       if (n2.getKind() == STRING_SUBSTR)
 421       {
 422         if (n2[0] == n1)
 423         {
 424           bool success = true;
 425           Node start_pos = n2[1];
 426           Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
 427           Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
 428           if (dir == 1)
 429           {
 430             // To be a suffix, start + length must be greater than
 431             // or equal to the length of the string.
 432             success = ArithEntail::check(end_pos, len_n2s);
 433           }
 434           else if (dir == -1)
 435           {
 436             // To be a prefix, must literally start at 0, since
 437             //   if we knew it started at <0, it should be rewritten to "",
 438             //   if we knew it started at 0, then n2[1] should be rewritten to
 439             //   0.
 440             success = start_pos.isConst()
 441                       && start_pos.getConst<Rational>().sgn() == 0;
 442           }
 443           if (success)
 444           {
 445             if (computeRemainder)
 446             {
 447               // we can only compute the remainder if start_pos and end_pos
 448               // are known to be non-negative.
 449               if (!ArithEntail::check(start_pos)
 450                   || !ArithEntail::check(end_pos))
 451               {
 452                 return false;
 453               }
 454               if (dir != 1)
 455               {
 456                 n1rb = nm->mkNode(
 457                     STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
 458               }
 459               if (dir != -1)
 460               {
 461                 n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
 462               }
 463             }
 464             return true;
 465           }
 466         }
 467       }
 468
 469       if (!computeRemainder && dir == 0)
 470       {
 471         if (n1.getKind() == STRING_STRREPL)
 472         {
 473           // (str.contains (str.replace x y z) w) ---> true
 474           // if (str.contains x w) --> true and (str.contains z w) ---> true
 475           Node xCtnW = checkContains(n1[0], n2);
 476           if (!xCtnW.isNull() && xCtnW.getConst<bool>())
 477           {
 478             Node zCtnW = checkContains(n1[2], n2);
 479             if (!zCtnW.isNull() && zCtnW.getConst<bool>())
 480             {
 481               return true;
 482             }
 483           }
 484         }
 485       }
 486     }
 487   }
 488   return false;
 489 }
 490
 491 bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
 492                                            std::vector<Node>& n2,
 493                                            std::vector<Node>& nb,
 494                                            std::vector<Node>& ne,
 495                                            int dir)
 496 {
 497   Assert(nb.empty());
 498   Assert(ne.empty());
 499
 500   NodeManager* nm = NodeManager::currentNM();
 501   bool changed = false;
 502   // for ( forwards, backwards ) direction
 503   for (unsigned r = 0; r < 2; r++)
 504   {
 505     if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
 506     {
 507       unsigned index0 = r == 0 ? 0 : n1.size() - 1;
 508       unsigned index1 = r == 0 ? 0 : n2.size() - 1;
 509       bool removeComponent = false;
 510       Node n1cmp = n1[index0];
 511
 512       if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
 513       {
 514         return false;
 515       }
 516
 517       std::vector<Node> sss;
 518       std::vector<Node> sls;
 519       n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
 520       Trace("strings-rewrite-debug2")
 521           << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
 522           << ", dir = " << dir << std::endl;
 523       if (n1cmp.isConst())
 524       {
 525         CVC4::String s = n1cmp.getConst<String>();
 526         // overlap is an overapproximation of the number of characters
 527         // n2[index1] can match in s
 528         unsigned overlap = s.size();
 529         if (n2[index1].isConst())
 530         {
 531           CVC4::String t = n2[index1].getConst<String>();
 532           std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
 533           if (ret == std::string::npos)
 534           {
 535             if (n1.size() == 1)
 536             {
 537               // can remove everything
 538               //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
 539               //   str.contains( "", str.++( "ba", x ) )
 540               removeComponent = true;
 541             }
 542             else if (sss.empty())  // only if not substr
 543             {
 544               // check how much overlap there is
 545               // This is used to partially strip off the endpoint
 546               // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
 547               // str.contains( str.++( "c", x ), str.++( "cd", y ) )
 548               overlap = r == 0 ? s.overlap(t) : t.overlap(s);
 549             }
 550             else
 551             {
 552               // if we are looking at a substring, we can remove the component
 553               // if there is no overlap
 554               //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
 555               //        --> str.contains( x, "a" )
 556               removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
 557             }
 558           }
 559           else if (sss.empty())  // only if not substr
 560           {
 561             Assert(ret < s.size());
 562             // can strip off up to the find position, e.g.
 563             // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
 564             // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
 565             // and
 566             // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
 567             // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
 568             overlap = s.size() - ret;
 569           }
 570         }
 571         else
 572         {
 573           // inconclusive
 574         }
 575         // process the overlap
 576         if (overlap < s.size())
 577         {
 578           changed = true;
 579           if (overlap == 0)
 580           {
 581             removeComponent = true;
 582           }
 583           else
 584           {
 585             // can drop the prefix (resp. suffix) from the first (resp. last)
 586             // component
 587             if (r == 0)
 588             {
 589               nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
 590               n1[index0] = nm->mkConst(s.suffix(overlap));
 591             }
 592             else
 593             {
 594               ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
 595               n1[index0] = nm->mkConst(s.prefix(overlap));
 596             }
 597           }
 598         }
 599       }
 600       else if (n1cmp.getKind() == STRING_ITOS)
 601       {
 602         if (n2[index1].isConst())
 603         {
 604           CVC4::String t = n2[index1].getConst<String>();
 605
 606           if (n1.size() == 1)
 607           {
 608             // if n1.size()==1, then if n2[index1] is not a number, we can drop
 609             // the entire component
 610             //    e.g. str.contains( int.to.str(x), "123a45") --> false
 611             if (!t.isNumber())
 612             {
 613               removeComponent = true;
 614             }
 615           }
 616           else
 617           {
 618             const std::vector<unsigned>& tvec = t.getVec();
 619             Assert(tvec.size() > 0);
 620
 621             // if n1.size()>1, then if the first (resp. last) character of
 622             // n2[index1]
 623             //  is not a digit, we can drop the entire component, e.g.:
 624             //    str.contains( str.++( int.to.str(x), y ), "a12") -->
 625             //    str.contains( y, "a12" )
 626             //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
 627             //    str.contains( y, "a0b" )
 628             unsigned i = r == 0 ? 0 : (tvec.size() - 1);
 629             if (!String::isDigit(tvec[i]))
 630             {
 631               removeComponent = true;
 632             }
 633           }
 634         }
 635       }
 636       if (removeComponent)
 637       {
 638         // can drop entire first (resp. last) component
 639         if (r == 0)
 640         {
 641           nb.push_back(n1[index0]);
 642           n1.erase(n1.begin(), n1.begin() + 1);
 643         }
 644         else
 645         {
 646           ne.push_back(n1[index0]);
 647           n1.pop_back();
 648         }
 649         if (n1.empty())
 650         {
 651           // if we've removed everything, just return (we will rewrite to false)
 652           return true;
 653         }
 654         else
 655         {
 656           changed = true;
 657         }
 658       }
 659     }
 660   }
 661   // TODO (#1180) : computing the maximal overlap in this function may be
 662   // important.
 663   // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
 664   // false
 665   //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
 666   //   leaving 4 characters
 667   //      which is larger that the upper bound for length of str.substr(y,0,3),
 668   //      which is 3.
 669   return changed;
 670 }
 671
 672 Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
 673 {
 674   NodeManager* nm = NodeManager::currentNM();
 675   Node ctn = nm->mkNode(STRING_STRCTN, a, b);
 676
 677   if (fullRewriter)
 678   {
 679     ctn = Rewriter::rewrite(ctn);
 680   }
 681   else
 682   {
 683     Node prev;
 684     do
 685     {
 686       prev = ctn;
 687       ctn = d_rewriter.rewriteContains(ctn);
 688     } while (prev != ctn && ctn.getKind() == STRING_STRCTN);
 689   }
 690
 691   Assert(ctn.getType().isBoolean());
 692   return ctn.isConst() ? ctn : Node::null();
 693 }
 694
 695 bool StringsEntail::checkNonEmpty(Node a)
 696 {
 697   Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
 698   len = Rewriter::rewrite(len);
 699   return ArithEntail::check(len, true);
 700 }
 701
 702 bool StringsEntail::checkLengthOne(Node s, bool strict)
 703 {
 704   NodeManager* nm = NodeManager::currentNM();
 705   Node one = nm->mkConst(Rational(1));
 706   Node len = nm->mkNode(STRING_LENGTH, s);
 707   len = Rewriter::rewrite(len);
 708   return ArithEntail::check(one, len)
 709          && (!strict || ArithEntail::check(len, true));
 710 }
 711
 712 bool StringsEntail::checkMultisetSubset(Node a, Node b)
 713 {
 714   std::vector<Node> avec;
 715   utils::getConcat(getMultisetApproximation(a), avec);
 716   std::vector<Node> bvec;
 717   utils::getConcat(b, bvec);
 718
 719   std::map<Node, unsigned> num_nconst[2];
 720   std::map<Node, unsigned> num_const[2];
 721   for (unsigned j = 0; j < 2; j++)
 722   {
 723     std::vector<Node>& jvec = j == 0 ? avec : bvec;
 724     for (const Node& cc : jvec)
 725     {
 726       if (cc.isConst())
 727       {
 728         num_const[j][cc]++;
 729       }
 730       else
 731       {
 732         num_nconst[j][cc]++;
 733       }
 734     }
 735   }
 736   bool ms_success = true;
 737   for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
 738   {
 739     if (nncp.second > num_nconst[1][nncp.first])
 740     {
 741       ms_success = false;
 742       break;
 743     }
 744   }
 745   if (ms_success)
 746   {
 747     // count the number of constant characters in the first argument
 748     std::map<Node, unsigned> count_const[2];
 749     std::vector<Node> chars;
 750     for (unsigned j = 0; j < 2; j++)
 751     {
 752       for (std::pair<const Node, unsigned>& ncp : num_const[j])
 753       {
 754         Node cn = ncp.first;
 755         Assert(cn.isConst());
 756         std::vector<Node> cnChars = Word::getChars(cn);
 757         for (const Node& ch : cnChars)
 758         {
 759           count_const[j][ch] += ncp.second;
 760           if (std::find(chars.begin(), chars.end(), ch) == chars.end())
 761           {
 762             chars.push_back(ch);
 763           }
 764         }
 765       }
 766     }
 767     Trace("strings-entail-ms-ss")
 768         << "For " << a << " and " << b << " : " << std::endl;
 769     for (const Node& ch : chars)
 770     {
 771       Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
 772       Trace("strings-entail-ms-ss") << ch << " in arguments is ";
 773       Trace("strings-entail-ms-ss")
 774           << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
 775       if (count_const[0][ch] < count_const[1][ch])
 776       {
 777         return true;
 778       }
 779     }
 780
 781     // TODO (#1180): count the number of 2,3,4,.. character substrings
 782     // for example:
 783     // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
 784     // since the second argument contains more occurrences of "bb".
 785     // note this is orthogonal reasoning to inductive reasoning
 786     // via regular membership reduction in Liang et al CAV 2015.
 787   }
 788   return false;
 789 }
 790
 791 Node StringsEntail::checkHomogeneousString(Node a)
 792 {
 793   std::vector<Node> avec;
 794   utils::getConcat(getMultisetApproximation(a), avec);
 795
 796   bool cValid = false;
 797   Node c;
 798   for (const Node& ac : avec)
 799   {
 800     if (ac.isConst())
 801     {
 802       std::vector<Node> acv = Word::getChars(ac);
 803       for (const Node& cc : acv)
 804       {
 805         if (!cValid)
 806         {
 807           cValid = true;
 808           c = cc;
 809         }
 810         else if (c != cc)
 811         {
 812           // Found a different character
 813           return Node::null();
 814         }
 815       }
 816     }
 817     else
 818     {
 819       // Could produce a different character
 820       return Node::null();
 821     }
 822   }
 823
 824   if (!cValid)
 825   {
 826     return Word::mkEmptyWord(a.getType());
 827   }
 828
 829   return c;
 830 }
 831
 832 Node StringsEntail::getMultisetApproximation(Node a)
 833 {
 834   NodeManager* nm = NodeManager::currentNM();
 835   if (a.getKind() == STRING_SUBSTR)
 836   {
 837     return a[0];
 838   }
 839   else if (a.getKind() == STRING_STRREPL)
 840   {
 841     return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
 842   }
 843   else if (a.getKind() == STRING_CONCAT)
 844   {
 845     NodeBuilder<> nb(STRING_CONCAT);
 846     for (const Node& ac : a)
 847     {
 848       nb << getMultisetApproximation(ac);
 849     }
 850     return nb.constructNode();
 851   }
 852   else
 853   {
 854     return a;
 855   }
 856 }
 857
 858 Node StringsEntail::getStringOrEmpty(Node n)
 859 {
 860   Node res;
 861   while (res.isNull())
 862   {
 863     switch (n.getKind())
 864     {
 865       case STRING_STRREPL:
 866       {
 867         if (Word::isEmpty(n[0]))
 868         {
 869           // (str.replace "" x y) --> y
 870           n = n[2];
 871           break;
 872         }
 873
 874         if (checkLengthOne(n[0]) && Word::isEmpty(n[2]))
 875         {
 876           // (str.replace "A" x "") --> "A"
 877           res = n[0];
 878           break;
 879         }
 880
 881         res = n;
 882         break;
 883       }
 884       case STRING_SUBSTR:
 885       {
 886         if (checkLengthOne(n[0]))
 887         {
 888           // (str.substr "A" x y) --> "A"
 889           res = n[0];
 890           break;
 891         }
 892         res = n;
 893         break;
 894       }
 895       default:
 896       {
 897         res = n;
 898         break;
 899       }
 900     }
 901   }
 902   return res;
 903 }
 904
 905 Node StringsEntail::inferEqsFromContains(Node x, Node y)
 906 {
 907   NodeManager* nm = NodeManager::currentNM();
 908   Node emp = Word::mkEmptyWord(x.getType());
 909   Assert(x.getType() == y.getType());
 910   TypeNode stype = x.getType();
 911
 912   Node xLen = nm->mkNode(STRING_LENGTH, x);
 913   std::vector<Node> yLens;
 914   if (y.getKind() != STRING_CONCAT)
 915   {
 916     yLens.push_back(nm->mkNode(STRING_LENGTH, y));
 917   }
 918   else
 919   {
 920     for (const Node& yi : y)
 921     {
 922       yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
 923     }
 924   }
 925
 926   std::vector<Node> zeroLens;
 927   if (x == emp)
 928   {
 929     // If x is the empty string, then all ys must be empty, too, and we can
 930     // skip the expensive checks. Note that this is just a performance
 931     // optimization.
 932     zeroLens.swap(yLens);
 933   }
 934   else
 935   {
 936     // Check if we can infer that str.len(x) <= str.len(y). If that is the
 937     // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
 938     // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
 939     // true. The terms that can have length zero without making the inequality
 940     // false must be all be empty if (str.contains x y) is true.
 941     if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
 942     {
 943       // We could not prove that the inequality holds
 944       return Node::null();
 945     }
 946     else if (yLens.size() == y.getNumChildren())
 947     {
 948       // We could only prove that the inequality holds but not that any of the
 949       // ys must be empty
 950       return nm->mkNode(EQUAL, x, y);
 951     }
 952   }
 953
 954   if (y.getKind() != STRING_CONCAT)
 955   {
 956     if (zeroLens.size() == 1)
 957     {
 958       // y is not a concatenation and we found that it must be empty, so just
 959       // return (= y "")
 960       Assert(zeroLens[0][0] == y);
 961       return nm->mkNode(EQUAL, y, emp);
 962     }
 963     else
 964     {
 965       Assert(yLens.size() == 1 && yLens[0][0] == y);
 966       return nm->mkNode(EQUAL, x, y);
 967     }
 968   }
 969
 970   std::vector<Node> cs;
 971   for (const Node& yiLen : yLens)
 972   {
 973     Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
 974     cs.push_back(yiLen[0]);
 975   }
 976
 977   NodeBuilder<> nb(AND);
 978   // (= x (str.++ y1' ... ym'))
 979   if (!cs.empty())
 980   {
 981     nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
 982   }
 983   // (= y1'' "") ... (= yk'' "")
 984   for (const Node& zeroLen : zeroLens)
 985   {
 986     Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
 987     nb << nm->mkNode(EQUAL, zeroLen[0], emp);
 988   }
 989
 990   // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
 991   return nb.constructNode();
 992 }
 993
 994 }  // namespace strings
 995 }  // namespace theory
 996 }  // namespace CVC4