Refactor DidYouMean (#7535)
authorGereon Kremer <nafur42@gmail.com>
Mon, 1 Nov 2021 10:52:55 +0000 (03:52 -0700)
committerGitHub <noreply@github.com>
Mon, 1 Nov 2021 10:52:55 +0000 (10:52 +0000)
This refactors the `DidYouMean` class, which is used to make helpful suggestions for misspelled tags and options. It uses `std::vector` instead of `std::set`, moves it from `options` to `util` (so that we can keep using it in both libcvc5 and the driver) and generally modernizes the code a bit. Also, it replaces the stand-alone and never executed test by a regular unit test.

src/CMakeLists.txt
src/main/options_template.cpp
src/options/didyoumean.cpp [deleted file]
src/options/didyoumean.h [deleted file]
src/options/didyoumean_test.cpp [deleted file]
src/options/options_handler.cpp
src/util/CMakeLists.txt
src/util/didyoumean.cpp [new file with mode: 0644]
src/util/didyoumean.h [new file with mode: 0644]
test/unit/util/CMakeLists.txt
test/unit/util/didyoumean_black.cpp [new file with mode: 0644]

index d87d5155904d93ecfad4bc6c125a914ef0e486dd..721ccebe9a382564fe363a2053bafb8d85f42b30 100644 (file)
@@ -51,8 +51,6 @@ libcvc5_add_sources(
   omt/omt_optimizer.cpp
   omt/omt_optimizer.h
   options/decision_weight.h
-  options/didyoumean.cpp
-  options/didyoumean.h
   options/language.cpp
   options/language.h
   options/managed_streams.cpp
index 138821c71987a2b6fb9cfecd842ae907baceb803..e0429c5f91f70d57dfbcf51df9fcd929e9a414f8 100644 (file)
@@ -37,8 +37,8 @@ extern int optreset;
 
 #include "base/check.h"
 #include "base/output.h"
-#include "options/didyoumean.h"
 #include "options/option_exception.h"
+#include "util/didyoumean.h"
 
 #include <cstring>
 #include <iostream>
diff --git a/src/options/didyoumean.cpp b/src/options/didyoumean.cpp
deleted file mode 100644 (file)
index 06abd51..0000000
+++ /dev/null
@@ -1,169 +0,0 @@
-/******************************************************************************
- * Top contributors (to current version):
- *   Kshitij Bansal, Tim King, Clark Barrett
- *
- * This file is part of the cvc5 project.
- *
- * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
- * in the top-level source directory and their institutional affiliations.
- * All rights reserved.  See the file COPYING in the top-level source
- * directory for licensing information.
- * ****************************************************************************
- *
- * Did-you-mean style suggestions.
- *
- * ``What do you mean? I don't understand.'' An attempt to be more
- * helpful than that. Similar to one in git.
- *
- * There are no dependencies on cvc5 (except namespace).
- */
-
-#include "options/didyoumean.h"
-
-#include <iostream>
-#include <set>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace cvc5 {
-
-std::vector<std::string> DidYouMean::getMatch(const std::string& input)
-{
-  /** Magic numbers */
-  const int similarityThreshold = 7;
-  const unsigned numMatchesThreshold = 10;
-
-  typedef std::set<std::pair<int, std::string> > ScoreSet;
-  ScoreSet scores;
-  std::vector<std::string> ret;
-  for (Words::const_iterator it = d_words.begin(); it != d_words.end(); ++it) {
-    std::string s = (*it);
-    if (s == input) {
-      // if input matches AS-IS just return that
-      ret.push_back(s);
-      return ret;
-    }
-    int score;
-    if (s.compare(0, input.size(), input) == 0) {
-      score = 0;
-    } else {
-      score = editDistance(input, s) + 1;
-    }
-    scores.insert(make_pair(score, s));
-  }
-  int min_score = scores.begin()->first;
-  for (ScoreSet::const_iterator i = scores.begin(); i != scores.end(); ++i) {
-    // add if score is overall not too big, and also not much close to
-    // the score of the best suggestion
-    if (i->first < similarityThreshold && i->first <= min_score + 1) {
-      ret.push_back(i->second);
-#ifdef DIDYOUMEAN_DEBUG
-      cout << i->second << ": " << i->first << std::endl;
-#endif
-    }
-  }
-  if (ret.size() > numMatchesThreshold) {
-    ret.resize(numMatchesThreshold);
-  }
-  return ret;
-}
-
-int DidYouMean::editDistance(const std::string& a, const std::string& b) {
-  // input string: a
-  // desired string: b
-
-  const size_t swapCost = 0;
-  const size_t substituteCost = 2;
-  const size_t addCost = 1;
-  const size_t deleteCost = 3;
-  const size_t switchCaseCost = 0;
-
-  size_t len1 = a.size();
-  size_t len2 = b.size();
-
-  size_t* C[3];
-  size_t ii;
-  for (ii = 0; ii < 3; ++ii) {
-    C[ii] = new size_t[len2 + 1];
-  }
-  //  int C[3][len2+1];             // cost
-
-  for (size_t j = 0; j <= len2; ++j)
-  {
-    C[0][j] = j * addCost;
-  }
-
-  for (size_t i = 1; i <= len1; ++i)
-  {
-    size_t cur = i % 3;
-    size_t prv = (i + 2) % 3;
-    size_t pr2 = (i + 1) % 3;
-
-    C[cur][0] = i * deleteCost;
-
-    for (size_t j = 1; j <= len2; ++j)
-    {
-      C[cur][j] = 100000000;  // INF
-
-      if (a[i - 1] == b[j - 1]) {
-        // match
-        C[cur][j] = std::min(C[cur][j], C[prv][j - 1]);
-      } else if (tolower(a[i - 1]) == tolower(b[j - 1])) {
-        // switch case
-        C[cur][j] = std::min(C[cur][j], C[prv][j - 1] + switchCaseCost);
-      } else {
-        // substitute
-        C[cur][j] = std::min(C[cur][j], C[prv][j - 1] + substituteCost);
-      }
-
-      // swap
-      if (i >= 2 && j >= 2 && a[i - 1] == b[j - 2] && a[i - 2] == b[j - 1]) {
-        C[cur][j] = std::min(C[cur][j], C[pr2][j - 2] + swapCost);
-      }
-
-      // add
-      C[cur][j] = std::min(C[cur][j], C[cur][j - 1] + addCost);
-
-      // delete
-      C[cur][j] = std::min(C[cur][j], C[prv][j] + deleteCost);
-
-#ifdef DIDYOUMEAN_DEBUG1
-      std::cout << "C[" << cur << "][" << 0 << "] = " << C[cur][0] << std::endl;
-#endif
-    }
-  }
-  int result = C[len1 % 3][len2];
-  for (ii = 0; ii < 3; ++ii) {
-    delete[] C[ii];
-  }
-  return result;
-}
-
-std::string DidYouMean::getMatchAsString(const std::string& input,
-                                         uint64_t prefixNewLines,
-                                         uint64_t suffixNewLines)
-{
-  std::vector<std::string> matches = getMatch(input);
-  std::ostringstream oss;
-  if (matches.size() > 0) {
-    while (prefixNewLines-- > 0) {
-      oss << std::endl;
-    }
-    if (matches.size() == 1) {
-      oss << "Did you mean this?";
-    } else {
-      oss << "Did you mean any of these?";
-    }
-    for (size_t i = 0; i < matches.size(); ++i)
-    {
-      oss << "\n        " << matches[i];
-    }
-    while (suffixNewLines-- > 0) {
-      oss << std::endl;
-    }
-  }
-  return oss.str();
-}
-
-}  // namespace cvc5
diff --git a/src/options/didyoumean.h b/src/options/didyoumean.h
deleted file mode 100644 (file)
index 0118f14..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/******************************************************************************
- * Top contributors (to current version):
- *   Kshitij Bansal, Tim King
- *
- * This file is part of the cvc5 project.
- *
- * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
- * in the top-level source directory and their institutional affiliations.
- * All rights reserved.  See the file COPYING in the top-level source
- * directory for licensing information.
- * ****************************************************************************
- *
- * Did-you-mean style suggestions.
- *
- * ``What do you mean? I don't understand.'' An attempt to be more
- * helpful than that. Similar to one in git.
- *
- * There are no dependencies on cvc5 (except namespace).
- */
-
-#pragma once
-
-#include "cvc5_export.h"
-
-#include <set>
-#include <string>
-#include <vector>
-
-namespace cvc5 {
-
-class CVC5_EXPORT DidYouMean {
- public:
-  using Words = std::set<std::string>;
-
-  DidYouMean() {}
-  ~DidYouMean() {}
-
-  void addWord(std::string word) { d_words.insert(std::move(word)); }
-  void addWords(const std::vector<std::string>& words)
-  {
-    d_words.insert(words.begin(), words.end());
-  }
-
-  std::vector<std::string> getMatch(const std::string& input);
-
-  /**
-   * This is provided to make it easier to ensure consistency of
-   * output. Returned string is empty if there are no matches.
-   */
-  std::string getMatchAsString(const std::string& input,
-                               uint64_t prefixNewLines = 2,
-                               uint64_t suffixNewLines = 0);
-
- private:
-  int editDistance(const std::string& a, const std::string& b);
-  Words d_words;
-};
-
-}  // namespace cvc5
diff --git a/src/options/didyoumean_test.cpp b/src/options/didyoumean_test.cpp
deleted file mode 100644 (file)
index 9da20ce..0000000
+++ /dev/null
@@ -1,763 +0,0 @@
-/******************************************************************************
- * Top contributors (to current version):
- *   Kshitij Bansal, Tim King, Andrew Reynolds
- *
- * This file is part of the cvc5 project.
- *
- * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
- * in the top-level source directory and their institutional affiliations.
- * All rights reserved.  See the file COPYING in the top-level source
- * directory for licensing information.
- * ****************************************************************************
- *
- * [[ Add one-line brief description here ]]
- *
- * [[ Add lengthier description here ]]
- * \todo document this file
- */
-
-// This is not built as a part of cvc5 and is not built by Makefile.am.
-// Compile: g++ didyoumean_test.cpp didyoumean.cpp
-// For debug compile with -DDIDYOUMEAN_DEBUG or -DDIDYOUMEAN_DEBUG1 or both
-
-#include "didyoumean.h"
-#include <iostream>
-
-using namespace std;
-using namespace cvc5;
-
-set<string> getDebugTags();
-set<string> getOptionStrings();
-
-int main() {
-  string a, b;
-
-  cin >> a;
-  cout << "Matches with debug tags:" << endl;
-  for (const string& s : DidYouMean(getDebugTags()).getMatch(a)) {
-    cout << s << endl;
-  }
-  cout << "Matches with option strings:" << endl;
-  for (const string& s : DidYouMean(getOptionStrings()).getMatch(a)) {
-    cout << s << endl;
-  }
-}
-
-set<string> getDebugTags() {
-  set<string> a;
-  a.insert("CDInsertHashMap");
-  a.insert("CDTrailHashMap");
-  a.insert("TrailHashMap");
-  a.insert("approx");
-  a.insert("approx::");
-  a.insert("approx::branch");
-  a.insert("approx::checkCutOnPad");
-  a.insert("approx::constraint");
-  a.insert("approx::gmi");
-  a.insert("approx::gmiCut");
-  a.insert("approx::guessIsConstructable");
-  a.insert("approx::lemmas");
-  a.insert("approx::mir");
-  a.insert("approx::mirCut");
-  a.insert("approx::nodelog");
-  a.insert("approx::replayAssert");
-  a.insert("approx::replayLogRec");
-  a.insert("approx::soi");
-  a.insert("approx::solveMIP");
-  a.insert("approx::sumConstraints");
-  a.insert("approx::vars");
-  a.insert("arith");
-  a.insert("arith::addSharedTerm");
-  a.insert("arith::approx::cuts");
-  a.insert("arith::arithvar");
-  a.insert("arith::asVectors");
-  a.insert("arith::bt");
-  a.insert("arith::collectModelInfo");
-  a.insert("arith::conflict");
-  a.insert("arith::congruenceManager");
-  a.insert("arith::congruences");
-  a.insert("arith::consistency");
-  a.insert("arith::consistency::comitonconflict");
-  a.insert("arith::consistency::final");
-  a.insert("arith::consistency::initial");
-  a.insert("arith::constraint");
-  a.insert("arith::dio");
-  a.insert("arith::dio::ex");
-  a.insert("arith::dio::max");
-  a.insert("arith::div");
-  a.insert("arith::dual");
-  a.insert("arith::ems");
-  a.insert("arith::eq");
-  a.insert("arith::error::mem");
-  a.insert("arith::explain");
-  a.insert("arith::explainNonbasics");
-  a.insert("arith::findModel");
-  a.insert("arith::focus");
-  a.insert("arith::hasIntegerModel");
-  a.insert("arith::importSolution");
-  a.insert("arith::ite");
-  a.insert("arith::ite::red");
-  a.insert("arith::learned");
-  a.insert("arith::lemma");
-  a.insert("arith::nf");
-  a.insert("arith::oldprop");
-  a.insert("arith::pivot");
-  a.insert("arith::preprocess");
-  a.insert("arith::preregister");
-  a.insert("arith::presolve");
-  a.insert("arith::print_assertions");
-  a.insert("arith::print_model");
-  a.insert("arith::prop");
-  a.insert("arith::resolveOutPropagated");
-  a.insert("arith::restart");
-  a.insert("arith::rewriter");
-  a.insert("arith::selectPrimalUpdate");
-  a.insert("arith::simplex:row");
-  a.insert("arith::solveInteger");
-  a.insert("arith::static");
-  a.insert("arith::subsumption");
-  a.insert("arith::tracking");
-  a.insert("arith::tracking::mid");
-  a.insert("arith::tracking::post");
-  a.insert("arith::tracking::pre");
-  a.insert("arith::unate");
-  a.insert("arith::unate::conf");
-  a.insert("arith::update");
-  a.insert("arith::update::select");
-  a.insert("arith::value");
-  a.insert("array-pf");
-  a.insert("array-types");
-  a.insert("arrays");
-  a.insert("arrays-model-based");
-  a.insert("arrays::propagate");
-  a.insert("arrays::sharing");
-  a.insert("attrgc");
-  a.insert("basicsAtBounds");
-  a.insert("bitvector");
-  a.insert("bitvector-bb");
-  a.insert("bitvector-bitblast");
-  a.insert("bitvector-expandDefinition");
-  a.insert("bitvector-model");
-  a.insert("bitvector-preregister");
-  a.insert("bitvector-rewrite");
-  a.insert("bitvector::bitblaster");
-  a.insert("bitvector::core");
-  a.insert("bitvector::explain");
-  a.insert("bitvector::propagate");
-  a.insert("bitvector::sharing");
-  a.insert("bool-flatten");
-  a.insert("bool-ite");
-  a.insert("boolean-terms");
-  a.insert("bt");
-  a.insert("builder");
-  a.insert("bv-bitblast");
-  a.insert("bv-core");
-  a.insert("bv-core-model");
-  a.insert("bv-inequality");
-  a.insert("bv-inequality-explain");
-  a.insert("bv-inequality-internal");
-  a.insert("bv-rewrite");
-  a.insert("bv-slicer");
-  a.insert("bv-slicer-eq");
-  a.insert("bv-slicer-uf");
-  a.insert("bv-subtheory-inequality");
-  a.insert("bv-to-bool");
-  a.insert("bva");
-  a.insert("bvminisat");
-  a.insert("bvminisat::explain");
-  a.insert("bvminisat::search");
-  a.insert("cegqi");
-  a.insert("cegqi-debug");
-  a.insert("cegqi-prop-as-dec");
-  a.insert("cd_set_collection");
-  a.insert("cdlist");
-  a.insert("cdlist:cmm");
-  a.insert("cdqueue");
-  a.insert("check-inst");
-  a.insert("check-model::rep-checking");
-  a.insert("circuit-prop");
-  a.insert("cnf");
-  a.insert("constructInfeasiblityFunction");
-  a.insert("context");
-  a.insert("current");
-  a.insert("datatypes");
-  a.insert("datatypes-cycle-check");
-  a.insert("datatypes-cycles");
-  a.insert("datatypes-cycles-find");
-  a.insert("datatypes-debug");
-  a.insert("datatypes-explain");
-  a.insert("datatypes-gt");
-  a.insert("datatypes-inst");
-  a.insert("datatypes-labels");
-  a.insert("datatypes-output");
-  a.insert("datatypes-parametric");
-  a.insert("datatypes-prereg");
-  a.insert("datatypes-split");
-  a.insert("decision");
-  a.insert("decision::jh");
-  a.insert("determineArithVar");
-  a.insert("diamonds");
-  a.insert("dio::push");
-  a.insert("dt");
-  a.insert("dt-enum");
-  a.insert("dt-warn");
-  a.insert("dt::propagate");
-  a.insert("dualLike");
-  a.insert("effortlevel");
-  a.insert("ensureLiteral");
-  a.insert("eq");
-  a.insert("equality");
-  a.insert("equality::backtrack");
-  a.insert("equality::disequality");
-  a.insert("equality::evaluation");
-  a.insert("equality::graph");
-  a.insert("equality::internal");
-  a.insert("equality::trigger");
-  a.insert("equalsConstant");
-  a.insert("error");
-  a.insert("estimateWithCFE");
-  a.insert("expand");
-  a.insert("export");
-  a.insert("flipdec");
-  a.insert("fmc-entry-trie");
-  a.insert("fmc-interval-model-debug");
-  a.insert("fmf-card-debug");
-  a.insert("fmf-eval-debug");
-  a.insert("fmf-eval-debug2");
-  a.insert("fmf-exit");
-  a.insert("fmf-index-order");
-  a.insert("fmf-model-complete");
-  a.insert("fmf-model-cons");
-  a.insert("fmf-model-cons-debug");
-  a.insert("fmf-model-eval");
-  a.insert("fmf-model-prefs");
-  a.insert("fmf-model-req");
-  a.insert("focusDownToJust");
-  a.insert("focusDownToLastHalf");
-  a.insert("foo");
-  a.insert("gaussianElimConstructTableRow");
-  a.insert("gc");
-  a.insert("gc:leaks");
-  a.insert("getBestImpliedBound");
-  a.insert("getCeiling");
-  a.insert("getNewDomainValue");
-  a.insert("getPropagatedLiterals");
-  a.insert("getType");
-  a.insert("glpk::loadVB");
-  a.insert("guessCoefficientsConstructTableRow");
-  a.insert("guessIsConstructable");
-  a.insert("handleBorders");
-  a.insert("includeBoundUpdate");
-  a.insert("inst");
-  a.insert("inst-engine");
-  a.insert("inst-engine-ctrl");
-  a.insert("inst-engine-debug");
-  a.insert("inst-engine-phase-req");
-  a.insert("inst-engine-stuck");
-  a.insert("inst-fmf-ei");
-  a.insert("inst-match-gen");
-  a.insert("inst-trigger");
-  a.insert("integers");
-  a.insert("interactive");
-  a.insert("intersectConstantIte");
-  a.insert("isConst");
-  a.insert("ite");
-  a.insert("ite::atom");
-  a.insert("ite::constantIteEqualsConstant");
-  a.insert("ite::print-success");
-  a.insert("ite::simpite");
-  a.insert("lemma-ites");
-  a.insert("lemmaInputChannel");
-  a.insert("literal-matching");
-  a.insert("logPivot");
-  a.insert("matrix");
-  a.insert("minisat");
-  a.insert("minisat::lemmas");
-  a.insert("minisat::pop");
-  a.insert("minisat::remove-clause");
-  a.insert("minisat::search");
-  a.insert("miplib");
-  a.insert("model");
-  a.insert("model-getvalue");
-  a.insert("nf::tmp");
-  a.insert("nm");
-  a.insert("normal-form");
-  a.insert("options");
-  a.insert("paranoid:check_tableau");
-  a.insert("parser");
-  a.insert("parser-extra");
-  a.insert("parser-idt");
-  a.insert("parser-param");
-  a.insert("partial_model");
-  a.insert("pb");
-  a.insert("pipe");
-  a.insert("portfolio::outputmode");
-  a.insert("prec");
-  a.insert("preemptGetopt");
-  a.insert("proof:sat");
-  a.insert("proof:sat:detailed");
-  a.insert("prop");
-  a.insert("prop-explain");
-  a.insert("prop-value");
-  a.insert("prop::lemmas");
-  a.insert("propagateAsDecision");
-  a.insert("properConflict");
-  a.insert("qcf-ccbe");
-  a.insert("qcf-check-inst");
-  a.insert("qcf-eval");
-  a.insert("qcf-match");
-  a.insert("qcf-match-debug");
-  a.insert("qcf-nground");
-  a.insert("qint-check-debug2");
-  a.insert("qint-debug");
-  a.insert("qint-error");
-  a.insert("qint-model-debug");
-  a.insert("qint-model-debug2");
-  a.insert("qint-var-order-debug2");
-  a.insert("quant-arith");
-  a.insert("quant-arith-debug");
-  a.insert("quant-arith-simplex");
-  a.insert("quant-datatypes");
-  a.insert("quant-datatypes-debug");
-  a.insert("quant-req-phase");
-  a.insert("quant-uf-strategy");
-  a.insert("quantifiers");
-  a.insert("quantifiers-assert");
-  a.insert("quantifiers-check");
-  a.insert("quantifiers-dec");
-  a.insert("quantifiers-engine");
-  a.insert("quantifiers-flip");
-  a.insert("quantifiers-other");
-  a.insert("quantifiers-prereg");
-  a.insert("quantifiers-presolve");
-  a.insert("quantifiers-relevance");
-  a.insert("quantifiers-sat");
-  a.insert("quantifiers-substitute-debug");
-  a.insert("quantifiers::collectModelInfo");
-  a.insert("queueConditions");
-  a.insert("rationalToCfe");
-  a.insert("recentlyViolated");
-  a.insert("register");
-  a.insert("register::internal");
-  a.insert("relevant-trigger");
-  a.insert("removeFixed");
-  a.insert("rl");
-  a.insert("sat::minisat");
-  a.insert("selectFocusImproving");
-  a.insert("set_collection");
-  a.insert("sets");
-  a.insert("sets-assert");
-  a.insert("sets-checkmodel-ignore");
-  a.insert("sets-eq");
-  a.insert("sets-learn");
-  a.insert("sets-lemma");
-  a.insert("sets-model");
-  a.insert("sets-model-details");
-  a.insert("sets-parent");
-  a.insert("sets-pending");
-  a.insert("sets-prop");
-  a.insert("sets-prop-details");
-  a.insert("sets-scrutinize");
-  a.insert("sets-terminfo");
-  a.insert("shared");
-  a.insert("shared-terms-database");
-  a.insert("shared-terms-database::assert");
-  a.insert("sharing");
-  a.insert("simple-trigger");
-  a.insert("simplify");
-  a.insert("smart-multi-trigger");
-  a.insert("smt");
-  a.insert("soi::findModel");
-  a.insert("soi::selectPrimalUpdate");
-  a.insert("solveRealRelaxation");
-  a.insert("sort");
-  a.insert("speculativeUpdate");
-  a.insert("strings");
-  a.insert("strings-explain");
-  a.insert("strings-explain-debug");
-  a.insert("strings-prereg");
-  a.insert("strings-propagate");
-  a.insert("substitution");
-  a.insert("substitution::internal");
-  a.insert("tableau");
-  a.insert("te");
-  a.insert("term-db-cong");
-  a.insert("theory");
-  a.insert("theory::assertions");
-  a.insert("theory::atoms");
-  a.insert("theory::bv::rewrite");
-  a.insert("theory::conflict");
-  a.insert("theory::explain");
-  a.insert("theory::idl");
-  a.insert("theory::idl::model");
-  a.insert("theory::internal");
-  a.insert("theory::propagate");
-  a.insert("trans-closure");
-  a.insert("treat-unknown-error");
-  a.insert("tuprec");
-  a.insert("typecheck-idt");
-  a.insert("typecheck-q");
-  a.insert("typecheck-r");
-  a.insert("uf");
-  a.insert("uf-ss");
-  a.insert("uf-ss-check-region");
-  a.insert("uf-ss-cliques");
-  a.insert("uf-ss-debug");
-  a.insert("uf-ss-disequal");
-  a.insert("uf-ss-na");
-  a.insert("uf-ss-region");
-  a.insert("uf-ss-region-debug");
-  a.insert("uf-ss-register");
-  a.insert("uf-ss-sat");
-  a.insert("uf::propagate");
-  a.insert("uf::sharing");
-  a.insert("ufgc");
-  a.insert("ufsymm");
-  a.insert("ufsymm:clauses");
-  a.insert("ufsymm:eq");
-  a.insert("ufsymm:match");
-  a.insert("ufsymm:norm");
-  a.insert("ufsymm:p");
-  a.insert("update");
-  a.insert("updateAndSignal");
-  a.insert("weak");
-  a.insert("whytheoryenginewhy");
-  return a;
-}
-
-set<string> getOptionStrings() {
-  const char* cmdlineOptions[] = {
-      "lang",
-      "output-lang",
-      "language",
-      "output-language",
-      "verbose",
-      "quiet",
-      "stats",
-      "no-stats",
-      "statistics",
-      "no-statistics",
-      "stats-every-query",
-      "no-stats-every-query",
-      "statistics-every-query",
-      "no-statistics-every-query",
-      "parse-only",
-      "no-parse-only",
-      "preprocess-only",
-      "no-preprocess-only",
-      "trace",
-      "debug",
-      "print-success",
-      "no-print-success",
-      "smtlib-strict",
-      "default-expr-depth",
-      "default-dag-thresh",
-      "print-expr-types",
-      "eager-type-checking",
-      "lazy-type-checking",
-      "no-type-checking",
-      "biased-ites",
-      "no-biased-ites",
-      "boolean-term-conversion-mode",
-      "theoryof-mode",
-      "use-theory",
-      "bitblast-eager",
-      "no-bitblast-eager",
-      "bitblast-share-lemmas",
-      "no-bitblast-share-lemmas",
-      "bitblast-eager-fullcheck",
-      "no-bitblast-eager-fullcheck",
-      "bv-inequality-solver",
-      "no-bv-inequality-solver",
-      "bv-core-solver",
-      "no-bv-core-solver",
-      "bv-to-bool",
-      "no-bv-to-bool",
-      "bv-propagate",
-      "no-bv-propagate",
-      "bv-eq",
-      "no-bv-eq",
-      "dt-rewrite-error-sel",
-      "no-dt-rewrite-error-sel",
-      "dt-force-assignment",
-      "unate-lemmas",
-      "arith-prop",
-      "heuristic-pivots",
-      "standard-effort-variable-order-pivots",
-      "error-selection-rule",
-      "simplex-check-period",
-      "pivot-threshold",
-      "prop-row-length",
-      "disable-dio-solver",
-      "enable-arith-rewrite-equalities",
-      "disable-arith-rewrite-equalities",
-      "enable-miplib-trick",
-      "disable-miplib-trick",
-      "miplib-trick-subs",
-      "cut-all-bounded",
-      "no-cut-all-bounded",
-      "maxCutsInContext",
-      "revert-arith-models-on-unsat",
-      "no-revert-arith-models-on-unsat",
-      "fc-penalties",
-      "no-fc-penalties",
-      "use-fcsimplex",
-      "no-use-fcsimplex",
-      "use-soi",
-      "no-use-soi",
-      "restrict-pivots",
-      "no-restrict-pivots",
-      "collect-pivot-stats",
-      "no-collect-pivot-stats",
-      "use-approx",
-      "no-use-approx",
-      "approx-branch-depth",
-      "dio-decomps",
-      "no-dio-decomps",
-      "new-prop",
-      "no-new-prop",
-      "arith-prop-clauses",
-      "soi-qe",
-      "no-soi-qe",
-      "rewrite-divk",
-      "no-rewrite-divk",
-      "se-solve-int",
-      "no-se-solve-int",
-      "lemmas-on-replay-failure",
-      "no-lemmas-on-replay-failure",
-      "dio-turns",
-      "rr-turns",
-      "dio-repeat",
-      "no-dio-repeat",
-      "replay-early-close-depth",
-      "replay-num-err-penalty",
-      "replay-reject-cut",
-      "replay-lemma-reject-cut",
-      "replay-soi-major-threshold",
-      "replay-soi-major-threshold-pen",
-      "replay-soi-minor-threshold",
-      "replay-soi-minor-threshold-pen",
-      "symmetry-breaker",
-      "no-symmetry-breaker",
-      "condense-function-values",
-      "no-condense-function-values",
-      "disable-uf-ss-regions",
-      "uf-ss-eager-split",
-      "no-uf-ss-eager-split",
-      "uf-ss-totality",
-      "no-uf-ss-totality",
-      "uf-ss-totality-limited",
-      "uf-ss-totality-sym-break",
-      "no-uf-ss-totality-sym-break",
-      "uf-ss-abort-card",
-      "uf-ss-explained-cliques",
-      "no-uf-ss-explained-cliques",
-      "uf-ss-simple-cliques",
-      "no-uf-ss-simple-cliques",
-      "uf-ss-deq-prop",
-      "no-uf-ss-deq-prop",
-      "disable-uf-ss-min-model",
-      "uf-ss-clique-splits",
-      "no-uf-ss-clique-splits",
-      "uf-ss-sym-break",
-      "no-uf-ss-sym-break",
-      "uf-ss-fair",
-      "no-uf-ss-fair",
-      "arrays-optimize-linear",
-      "no-arrays-optimize-linear",
-      "arrays-lazy-rintro1",
-      "no-arrays-lazy-rintro1",
-      "arrays-model-based",
-      "no-arrays-model-based",
-      "arrays-eager-index",
-      "no-arrays-eager-index",
-      "arrays-eager-lemmas",
-      "no-arrays-eager-lemmas",
-      "disable-miniscope-quant",
-      "disable-miniscope-quant-fv",
-      "disable-prenex-quant",
-      "disable-var-elim-quant",
-      "disable-ite-lift-quant",
-      "cnf-quant",
-      "no-cnf-quant",
-      "clause-split",
-      "no-clause-split",
-      "pre-skolem-quant",
-      "no-pre-skolem-quant",
-      "ag-miniscope-quant",
-      "no-ag-miniscope-quant",
-      "macros-quant",
-      "no-macros-quant",
-      "fo-prop-quant",
-      "no-fo-prop-quant",
-      "disable-smart-triggers",
-      "relevant-triggers",
-      "no-relevant-triggers",
-      "relational-triggers",
-      "no-relational-triggers",
-      "register-quant-body-terms",
-      "no-register-quant-body-terms",
-      "inst-when",
-      "eager-inst-quant",
-      "no-eager-inst-quant",
-      "full-saturate-quant",
-      "no-full-saturate-quant",
-      "literal-matching",
-      "enable-cbqi",
-      "no-enable-cbqi",
-      "cegqi-recurse",
-      "no-cbqi-recurse",
-      "user-pat",
-      "flip-decision",
-      "disable-quant-internal-reps",
-      "finite-model-find",
-      "no-finite-model-find",
-      "mbqi",
-      "mbqi-one-inst-per-round",
-      "no-mbqi-one-inst-per-round",
-      "mbqi-one-quant-per-round",
-      "no-mbqi-one-quant-per-round",
-      "fmf-inst-engine",
-      "no-fmf-inst-engine",
-      "disable-fmf-inst-gen",
-      "fmf-inst-gen-one-quant-per-round",
-      "no-fmf-inst-gen-one-quant-per-round",
-      "fmf-fresh-dc",
-      "no-fmf-fresh-dc",
-      "disable-fmf-fmc-simple",
-      "fmf-bound-int",
-      "no-fmf-bound-int",
-      "axiom-inst",
-      "quant-cf",
-      "no-quant-cf",
-      "quant-cf-mode",
-      "quant-cf-when",
-      "rewrite-rules",
-      "no-rewrite-rules",
-      "rr-one-inst-per-round",
-      "no-rr-one-inst-per-round",
-      "strings-exp",
-      "no-strings-exp",
-      "strings-lb",
-      "strings-fmf",
-      "no-strings-fmf",
-      "strings-eit",
-      "no-strings-eit",
-      "strings-alphabet-card",
-      "show-sat-solvers",
-      "random-freq",
-      "random-seed",
-      "restart-int-base",
-      "restart-int-inc",
-      "refine-conflicts",
-      "no-refine-conflicts",
-      "minisat-elimination",
-      "no-minisat-elimination",
-      "minisat-dump-dimacs",
-      "no-minisat-dump-dimacs",
-      "model-format",
-      "dump",
-      "dump-to",
-      "force-logic",
-      "simplification",
-      "no-simplification",
-      "static-learning",
-      "no-static-learning",
-      "produce-models",
-      "no-produce-models",
-      "check-models",
-      "no-check-models",
-      "dump-models",
-      "no-dump-models",
-      "proof",
-      "no-proof",
-      "check-proofs",
-      "no-check-proofs",
-      "dump-proofs",
-      "no-dump-proofs",
-      "produce-unsat-cores",
-      "no-produce-unsat-cores",
-      "produce-assignments",
-      "no-produce-assignments",
-      "interactive",
-      "no-interactive",
-      "ite-simp",
-      "no-ite-simp",
-      "on-repeat-ite-simp",
-      "no-on-repeat-ite-simp",
-      "simp-with-care",
-      "no-simp-with-care",
-      "simp-ite-compress",
-      "no-simp-ite-compress",
-      "unconstrained-simp",
-      "no-unconstrained-simp",
-      "repeat-simp",
-      "no-repeat-simp",
-      "simp-ite-hunt-zombies",
-      "sort-inference",
-      "no-sort-inference",
-      "incremental",
-      "no-incremental",
-      "abstract-values",
-      "no-abstract-values",
-      "model-u-dt-enum",
-      "no-model-u-dt-enum",
-      "tlimit",
-      "tlimit-per",
-      "rlimit",
-      "rlimit-per",
-      "rewrite-apply-to-const",
-      "no-rewrite-apply-to-const",
-      "replay",
-      "replay-log",
-      "decision",
-      "decision-threshold",
-      "decision-use-weight",
-      "no-decision-use-weight",
-      "decision-random-weight",
-      "decision-weight-internal",
-      "version",
-      "license",
-      "help",
-      "show-config",
-      "show-debug-tags",
-      "show-trace-tags",
-      "early-exit",
-      "no-early-exit",
-      "threads",
-      "threadN",
-      "filter-lemma-length",
-      "fallback-sequential",
-      "no-fallback-sequential",
-      "incremental-parallel",
-      "no-incremental-parallel",
-      "no-interactive-prompt",
-      "immediate-exit",
-      "segv-spin",
-      "no-segv-spin",
-      "segv-nospin",
-      "wait-to-join",
-      "no-wait-to-join",
-      "strict-parsing",
-      "no-strict-parsing",
-      "mmap",
-      "no-mmap",
-      "no-checking",
-      "no-filesystem-access",
-      "no-include-file",
-      "enable-idl-rewrite-equalities",
-      "disable-idl-rewrite-equalities",
-      "sets-propagate",
-      "no-sets-propagate",
-      "sets-eager-lemmas",
-      "no-sets-eager-lemmas",
-      NULL,
-  }; /* cmdlineOptions */
-  int i = 0;
-  set<string> ret;
-  while (cmdlineOptions[i] != NULL) {
-    ret.insert(cmdlineOptions[i]);
-    i++;
-  }
-  return ret;
-}
index 063145ea8ee0c4ee7338f64609a5a45511252244..feeb172fb98f6684ae4f87ce75d30183b2eb6446 100644 (file)
@@ -32,7 +32,6 @@
 #include "options/base_options.h"
 #include "options/bv_options.h"
 #include "options/decision_options.h"
-#include "options/didyoumean.h"
 #include "options/language.h"
 #include "options/option_exception.h"
 #include "options/set_language.h"
@@ -40,6 +39,7 @@
 #include "options/theory_options.h"
 #include "smt/command.h"
 #include "smt/dump.h"
+#include "util/didyoumean.h"
 
 namespace cvc5 {
 namespace options {
index ad277cbb603f1f907748d6bf9336052a0e3568ea..2ac529565d452c2d5de6f7c18857827255c8e342 100644 (file)
@@ -29,6 +29,8 @@ libcvc5_add_sources(
   cardinality_class.cpp
   cardinality_class.h
   dense_map.h
+  didyoumean.cpp
+  didyoumean.h
   divisible.cpp
   divisible.h
   floatingpoint.cpp
diff --git a/src/util/didyoumean.cpp b/src/util/didyoumean.cpp
new file mode 100644 (file)
index 0000000..0f95722
--- /dev/null
@@ -0,0 +1,157 @@
+/******************************************************************************
+ * Top contributors (to current version):
+ *   Kshitij Bansal, Tim King, Clark Barrett
+ *
+ * This file is part of the cvc5 project.
+ *
+ * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+ * in the top-level source directory and their institutional affiliations.
+ * All rights reserved.  See the file COPYING in the top-level source
+ * directory for licensing information.
+ * ****************************************************************************
+ *
+ * Did-you-mean style suggestions.
+ *
+ * ``What do you mean? I don't understand.'' An attempt to be more
+ * helpful than that. Similar to one in git.
+ *
+ * There are no dependencies on cvc5 (except namespace).
+ */
+
+#include "util/didyoumean.h"
+
+#include <algorithm>
+#include <array>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace cvc5 {
+
+namespace {
+
+uint64_t editDistance(const std::string& a, const std::string& b) {
+  // input string: a
+  // desired string: b
+
+  constexpr uint64_t swapCost = 0;
+  constexpr uint64_t substituteCost = 2;
+  constexpr uint64_t addCost = 1;
+  constexpr uint64_t deleteCost = 3;
+  constexpr uint64_t switchCaseCost = 0;
+
+  uint64_t len1 = a.size();
+  uint64_t len2 = b.size();
+
+  std::array<std::vector<uint64_t>, 3> C;
+  for (auto& c: C)
+  {
+    c.resize(len2 + 1);
+  }
+  //  int C[3][len2+1];             // cost
+
+  for (uint64_t j = 0; j <= len2; ++j)
+  {
+    C[0][j] = j * addCost;
+  }
+
+  for (uint64_t i = 1; i <= len1; ++i)
+  {
+    uint64_t cur = i % 3;
+    uint64_t prv = (i + 2) % 3;
+    uint64_t pr2 = (i + 1) % 3;
+
+    C[cur][0] = i * deleteCost;
+
+    for (uint64_t j = 1; j <= len2; ++j)
+    {
+      C[cur][j] = 100000000;  // INF
+
+      if (a[i - 1] == b[j - 1]) {
+        // match
+        C[cur][j] = std::min(C[cur][j], C[prv][j - 1]);
+      } else if (tolower(a[i - 1]) == tolower(b[j - 1])) {
+        // switch case
+        C[cur][j] = std::min(C[cur][j], C[prv][j - 1] + switchCaseCost);
+      } else {
+        // substitute
+        C[cur][j] = std::min(C[cur][j], C[prv][j - 1] + substituteCost);
+      }
+
+      // swap
+      if (i >= 2 && j >= 2 && a[i - 1] == b[j - 2] && a[i - 2] == b[j - 1]) {
+        C[cur][j] = std::min(C[cur][j], C[pr2][j - 2] + swapCost);
+      }
+
+      // add
+      C[cur][j] = std::min(C[cur][j], C[cur][j - 1] + addCost);
+
+      // delete
+      C[cur][j] = std::min(C[cur][j], C[prv][j] + deleteCost);
+    }
+  }
+  return C[len1 % 3][len2];
+}
+
+}
+
+std::vector<std::string> DidYouMean::getMatch(const std::string& input)
+{
+  {
+    std::sort(d_words.begin(), d_words.end());
+    auto it = std::unique(d_words.begin(), d_words.end());
+    d_words.erase(it, d_words.end());
+  }
+
+  /** Magic numbers */
+  constexpr uint64_t similarityThreshold = 7;
+  constexpr uint64_t numMatchesThreshold = 10;
+
+  std::vector<std::pair<uint64_t,std::string>> scores;
+  std::vector<std::string> ret;
+  for (const auto& s: d_words) {
+    if (s == input) {
+      // if input matches AS-IS just return that
+      ret.emplace_back(s);
+      return ret;
+    }
+    uint64_t score = 0;
+    if (s.compare(0, input.size(), input) != 0) {
+      score = editDistance(input, s) + 1;
+    }
+    scores.emplace_back(std::make_pair(score, s));
+  }
+  std::sort(scores.begin(), scores.end());
+  const uint64_t min_score = scores.begin()->first;
+  for (const auto& score: scores) {
+    // from here on, matches are not similar enough
+    if (score.first > similarityThreshold) break;
+    // from here on, matches are way worse than the best one
+    if (score.first > min_score + 2) break;
+    // we already have enough matches
+    if (ret.size() >= numMatchesThreshold) break;
+    ret.push_back(score.second);
+  }
+  return ret;
+}
+
+std::string DidYouMean::getMatchAsString(const std::string& input)
+{
+  std::vector<std::string> matches = getMatch(input);
+  std::ostringstream oss;
+  if (matches.size() > 0) {
+    oss << std::endl << std::endl;
+    if (matches.size() == 1) {
+      oss << "Did you mean this?";
+    } else {
+      oss << "Did you mean any of these?";
+    }
+    for (size_t i = 0; i < matches.size(); ++i)
+    {
+      oss << "\n        " << matches[i];
+    }
+  }
+  return oss.str();
+}
+
+}  // namespace cvc5
diff --git a/src/util/didyoumean.h b/src/util/didyoumean.h
new file mode 100644 (file)
index 0000000..26de80f
--- /dev/null
@@ -0,0 +1,50 @@
+/******************************************************************************
+ * Top contributors (to current version):
+ *   Kshitij Bansal, Tim King
+ *
+ * This file is part of the cvc5 project.
+ *
+ * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+ * in the top-level source directory and their institutional affiliations.
+ * All rights reserved.  See the file COPYING in the top-level source
+ * directory for licensing information.
+ * ****************************************************************************
+ *
+ * Did-you-mean style suggestions.
+ *
+ * ``What do you mean? I don't understand.'' An attempt to be more
+ * helpful than that. Similar to one in git.
+ *
+ * There are no dependencies on cvc5 (except namespace).
+ */
+
+#pragma once
+
+#include "cvc5_export.h"
+
+#include <string>
+#include <vector>
+
+namespace cvc5 {
+
+class CVC5_EXPORT DidYouMean {
+ public:
+  void addWord(const std::string& word) { d_words.emplace_back(word); }
+  void addWords(const std::vector<std::string>& words)
+  {
+    d_words.insert(d_words.end(), words.begin(), words.end());
+  }
+
+  std::vector<std::string> getMatch(const std::string& input);
+
+  /**
+   * This is provided to make it easier to ensure consistency of
+   * output. Returned string is empty if there are no matches.
+   */
+  std::string getMatchAsString(const std::string& input);
+
+ private:
+  std::vector<std::string> d_words;
+};
+
+}  // namespace cvc5
index c1630a203f9136f67ef47f2a85c85d17f37374e4..03348f6e1f2babbbb092f96e0a891af4f7bd7e49 100644 (file)
@@ -23,6 +23,7 @@ cvc5_add_unit_test_black(cardinality_black util)
 cvc5_add_unit_test_white(check_white util)
 cvc5_add_unit_test_black(configuration_black util)
 cvc5_add_unit_test_black(datatype_black util)
+cvc5_add_unit_test_white(didyoumean_black util)
 cvc5_add_unit_test_black(exception_black util)
 cvc5_add_unit_test_black(floatingpoint_black util)
 cvc5_add_unit_test_black(integer_black util)
diff --git a/test/unit/util/didyoumean_black.cpp b/test/unit/util/didyoumean_black.cpp
new file mode 100644 (file)
index 0000000..cc2fdf8
--- /dev/null
@@ -0,0 +1,75 @@
+/******************************************************************************
+ * Top contributors (to current version):
+ *   Aina Niemetz, Andres Noetzli, Gereon Kremer
+ *
+ * This file is part of the cvc5 project.
+ *
+ * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+ * in the top-level source directory and their institutional affiliations.
+ * All rights reserved.  See the file COPYING in the top-level source
+ * directory for licensing information.
+ * ****************************************************************************
+ *
+ * Black box testing of cvc5::Stat and associated classes.
+ */
+
+#include "test.h"
+
+#include "util/didyoumean.h"
+
+namespace cvc5::test {
+
+class TestUtilDidYouMean : public TestInternal
+{
+};
+
+TEST_F(TestUtilDidYouMean, getMatch)
+{
+  {
+    DidYouMean dym;
+    dym.addWords({"abfish", "cdfish", "whale"});
+    {
+      auto expected = std::vector<std::string>{"abfish"};
+      EXPECT_EQ(dym.getMatch("abfish"), expected);
+    }
+    {
+      auto expected = std::vector<std::string>{"whale"};
+      EXPECT_EQ(dym.getMatch("wahl"), expected);
+    }
+    {
+      auto expected = std::vector<std::string>{"abfish", "cdfish"};
+      EXPECT_EQ(dym.getMatch("fish"), expected);
+    }
+    {
+      auto expected = std::vector<std::string>{};
+      EXPECT_EQ(dym.getMatch("elephant"), expected);
+    }
+  }
+}
+
+TEST_F(TestUtilDidYouMean, getMatchAsString)
+{
+    DidYouMean dym;
+    dym.addWords({"abfish", "cdfish", "whale"});
+    {
+      std::string expected = "";
+      EXPECT_EQ(dym.getMatchAsString("elephant"), expected);
+    }
+    {
+      std::string expected = R"FOOBAR(
+
+Did you mean this?
+        whale)FOOBAR";
+      EXPECT_EQ(dym.getMatchAsString("wahl"), expected);
+    }
+    {
+      std::string expected = R"FOOBAR(
+
+Did you mean any of these?
+        abfish
+        cdfish)FOOBAR";
+      EXPECT_EQ(dym.getMatchAsString("fish"), expected);
+    }
+}
+
+}  // namespace cvc5::test