From: David Malcolm Date: Fri, 14 Sep 2018 22:02:58 +0000 (+0000) Subject: Fix overeager spelling corrections (PR c/82967) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=07f879058339c496273c9960562c3fdad317001a;p=gcc.git Fix overeager spelling corrections (PR c/82967) This patch tunes class best_match's cutoff for rejecting meaningless spelling suggestions. Previously, we allowed an edit distance of up to half of the length of the longer of the goal string and closest candidate strings, rounded down. With this patch, we now allow only up to a third - with some tuning of rounding (and for very short strings), to ensure that: (a) everything that worked before still works (with the removal of a couple of cases that shouldn't), and that (b) the new threshold is always at least as conservative as the old threshold and thus shouldn't offer new nonsensical suggestions (with the possible exception of cases where transposition has helped; see r261521 aka Damerau-Levenshtein; PR other/69968). In particular, all of the bogus suggestions from PR c/82967 are now no longer offered. gcc/ChangeLog: PR c/82967 * spellcheck.c (get_edit_distance_cutoff): New function. (selftest::test_edit_distance_unit_test_oneway): Rename to... (selftest::test_get_edit_distance_one_way): ...this. (selftest::test_get_edit_distance_unit): Rename to... (selftest::test_get_edit_distance_both_ways): ...this. (selftest::test_edit_distances): Move tests to this new function, and test some more pairs of strings. Update for above renaming. (selftest::get_old_cutoff): New function. (selftest::test_get_edit_distance_cutoff): New function. (selftest::assert_suggested_for): New function. (ASSERT_SUGGESTED_FOR): New macro. (selftest::assert_not_suggested_for): New function. (ASSERT_NOT_SUGGESTED_FOR): New macro. (selftest::test_suggestions): New function. (selftest::spellcheck_c_tests): Move test_get_edit_distance_unit tests to selftest::test_edit_distances and call it. Add calls to selftest::test_get_edit_distance_cutoff and selftest::test_suggestions. * spellcheck.h (get_edit_distance_cutoff): New function declaration. (best_match::consider): Replace hard-coded cutoff calculation with a call to... (best_match::get_cutoff): New declaration. (best_match::get_best_meaningful_candidate): Likewise. gcc/testsuite/ChangeLog: PR c/82967 * c-c++-common/attributes-1.c: Remove bogus suggestion from dg-prune-output. * gcc.dg/diagnostic-token-ranges.c (undeclared_identifier): Remove bogus suggestion. * gcc.dg/spellcheck-identifiers-4.c: New test. From-SVN: r264335 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 76619e9fc50..a2088aa9587 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +2018-09-14 David Malcolm + + PR c/82967 + * spellcheck.c (get_edit_distance_cutoff): New function. + (selftest::test_edit_distance_unit_test_oneway): Rename to... + (selftest::test_get_edit_distance_one_way): ...this. + (selftest::test_get_edit_distance_unit): Rename to... + (selftest::test_get_edit_distance_both_ways): ...this. + (selftest::test_edit_distances): Move tests to this new function, + and test some more pairs of strings. Update for above renaming. + (selftest::get_old_cutoff): New function. + (selftest::test_get_edit_distance_cutoff): New function. + (selftest::assert_suggested_for): New function. + (ASSERT_SUGGESTED_FOR): New macro. + (selftest::assert_not_suggested_for): New function. + (ASSERT_NOT_SUGGESTED_FOR): New macro. + (selftest::test_suggestions): New function. + (selftest::spellcheck_c_tests): Move test_get_edit_distance_unit + tests to selftest::test_edit_distances and call it. Add calls to + selftest::test_get_edit_distance_cutoff and + selftest::test_suggestions. + * spellcheck.h (get_edit_distance_cutoff): New function declaration. + (best_match::consider): Replace hard-coded cutoff calculation with + a call to... + (best_match::get_cutoff): New declaration. + (best_match::get_best_meaningful_candidate): Likewise. + 2018-09-14 Bernd Edlinger * builtins.c (fold_builtin_strlen): Remove TODO comment. diff --git a/gcc/spellcheck.c b/gcc/spellcheck.c index e2a8b92fdec..690e6fa674f 100644 --- a/gcc/spellcheck.c +++ b/gcc/spellcheck.c @@ -162,6 +162,36 @@ find_closest_string (const char *target, return bm.get_best_meaningful_candidate (); } +/* Generate the maximum edit distance for which we consider a suggestion + to be meaningful, given a goal of length GOAL_LEN and a candidate of + length CANDIDATE_LEN. + + This is a third of the the length of the candidate or of the goal, + whichever is bigger. */ + +edit_distance_t +get_edit_distance_cutoff (size_t goal_len, size_t candidate_len) +{ + size_t max_length = MAX (goal_len, candidate_len); + size_t min_length = MIN (goal_len, candidate_len); + + gcc_assert (max_length >= min_length); + + /* Special case: don't offer suggestions for a pair of + length == 1 strings (or empty strings). */ + if (max_length <= 1) + return 0; + + /* If the lengths are close, then round down. */ + if (max_length - min_length <= 1) + /* ...but allow an edit distance of at least 1. */ + return MAX (max_length / 3, 1); + + /* Otherwise, round up (thus giving a little extra leeway to some cases + involving insertions/deletions). */ + return (max_length + 2) / 3; +} + #if CHECKING_P namespace selftest { @@ -171,8 +201,8 @@ namespace selftest { /* Verify that get_edit_distance (A, B) equals the expected value. */ static void -test_edit_distance_unit_test_oneway (const char *a, const char *b, - edit_distance_t expected) +test_get_edit_distance_one_way (const char *a, const char *b, + edit_distance_t expected) { edit_distance_t actual = get_edit_distance (a, b); ASSERT_EQ (actual, expected); @@ -185,11 +215,169 @@ test_edit_distance_unit_test_oneway (const char *a, const char *b, equal the expected value, to ensure that the function is symmetric. */ static void -test_get_edit_distance_unit (const char *a, const char *b, +test_get_edit_distance_both_ways (const char *a, const char *b, edit_distance_t expected) { - test_edit_distance_unit_test_oneway (a, b, expected); - test_edit_distance_unit_test_oneway (b, a, expected); + test_get_edit_distance_one_way (a, b, expected); + test_get_edit_distance_one_way (b, a, expected); +} + +/* Verify get_edit_distance for a variety of pairs of pre-canned + inputs, comparing against known-good values. */ + +static void +test_edit_distances () +{ + test_get_edit_distance_both_ways ("", "nonempty", strlen ("nonempty")); + test_get_edit_distance_both_ways ("saturday", "sunday", 3); + test_get_edit_distance_both_ways ("foo", "m_foo", 2); + test_get_edit_distance_both_ways ("hello_world", "HelloWorld", 3); + test_get_edit_distance_both_ways + ("the quick brown fox jumps over the lazy dog", "dog", 40); + test_get_edit_distance_both_ways + ("the quick brown fox jumps over the lazy dog", + "the quick brown dog jumps over the lazy fox", + 4); + test_get_edit_distance_both_ways + ("Lorem ipsum dolor sit amet, consectetur adipiscing elit,", + "All your base are belong to us", + 44); + test_get_edit_distance_both_ways ("foo", "FOO", 3); + test_get_edit_distance_both_ways ("fee", "deed", 2); + test_get_edit_distance_both_ways ("coorzd1", "coordx1", 2); + test_get_edit_distance_both_ways ("assert", "sqrt", 3); + test_get_edit_distance_both_ways ("PATH_MAX", "INT8_MAX", 3); + test_get_edit_distance_both_ways ("time", "nice", 2); + test_get_edit_distance_both_ways ("bar", "carg", 2); + test_get_edit_distance_both_ways ("gtk_widget_show_all", + "GtkWidgetShowAll", 7); + test_get_edit_distance_both_ways ("m_bar", "bar", 2); + test_get_edit_distance_both_ways ("MACRO", "MACRAME", 3); + test_get_edit_distance_both_ways ("ab", "ac", 1); + test_get_edit_distance_both_ways ("ab", "a", 1); + test_get_edit_distance_both_ways ("a", "b", 1); + test_get_edit_distance_both_ways ("nanl", "name", 2); + test_get_edit_distance_both_ways ("char", "bar", 2); + test_get_edit_distance_both_ways ("-optimize", "fsanitize", 5); + test_get_edit_distance_both_ways ("__DATE__", "__i386__", 4); + + /* Examples where transposition helps. */ + test_get_edit_distance_both_ways ("ab", "ba", 1); + test_get_edit_distance_both_ways ("ba", "abc", 2); + test_get_edit_distance_both_ways ("coorzd1", "coordz1", 1); + test_get_edit_distance_both_ways ("abcdefghijklmnopqrstuvwxyz", + "bacdefghijklmnopqrstuvwxzy", 2); + test_get_edit_distance_both_ways ("saturday", "sundya", 4); + test_get_edit_distance_both_ways ("signed", "singed", 1); +} + +/* Subroutine of test_get_edit_distance_cutoff, for emulating the + spellchecking cutoff in up to GCC 8. */ + +static edit_distance_t +get_old_cutoff (size_t goal_len, size_t candidate_len) +{ + return MAX (goal_len, candidate_len) / 2; +} + +/* Verify that the cutoff for "meaningfulness" of suggestions is at least as + conservative as in older GCC releases. + + This should ensure that we don't offer additional meaningless + suggestions (apart from those for which transposition has helped). */ + +static void +test_get_edit_distance_cutoff () +{ + for (size_t goal_len = 0; goal_len < 30; goal_len++) + for (size_t candidate_len = 0; candidate_len < 30; candidate_len++) + ASSERT_TRUE (get_edit_distance_cutoff (goal_len, candidate_len) + <= get_old_cutoff (goal_len, candidate_len)); +} + +/* Assert that CANDIDATE is offered as a suggestion for TARGET. */ + +static void +assert_suggested_for (const location &loc, const char *candidate, + const char *target) +{ + auto_vec candidates; + candidates.safe_push (candidate); + ASSERT_EQ_AT (loc, candidate, find_closest_string (target, &candidates)); +} + +/* Assert that CANDIDATE is offered as a suggestion for TARGET. */ + +#define ASSERT_SUGGESTED_FOR(CANDIDATE, TARGET) \ + SELFTEST_BEGIN_STMT \ + assert_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \ + SELFTEST_END_STMT + +/* Assert that CANDIDATE is not offered as a suggestion for TARGET. */ + +static void +assert_not_suggested_for (const location &loc, const char *candidate, + const char *target) +{ + auto_vec candidates; + candidates.safe_push (candidate); + ASSERT_EQ_AT (loc, NULL, find_closest_string (target, &candidates)); +} + +/* Assert that CANDIDATE is not offered as a suggestion for TARGET. */ + +#define ASSERT_NOT_SUGGESTED_FOR(CANDIDATE, TARGET) \ + SELFTEST_BEGIN_STMT \ + assert_not_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \ + SELFTEST_END_STMT + +/* Verify that we offer varous suggestions that are meaningful, + and that we don't offer various other ones that aren't (PR c/82967). */ + +static void +test_suggestions () +{ + /* Good suggestions. */ + + ASSERT_SUGGESTED_FOR ("m_bar", "bar"); + // dist == 2, max_length == 5, min_length == 3 + + ASSERT_SUGGESTED_FOR ("MACRO", "MACRAME"); + // dist == 3, max_length == 7, min_length == 5 + + ASSERT_SUGGESTED_FOR ("gtk_widget_show_all", "GtkWidgetShowAll"); + // dist == 7, max_length == 16, min_length = 19 + + ASSERT_SUGGESTED_FOR ("ab", "ac"); + // dist == 1, max_length == min_length = 2 + + ASSERT_SUGGESTED_FOR ("ab", "a"); + // dist == 1, max_length == 2, min_length = 1 + + /* Bad suggestions. */ + + ASSERT_NOT_SUGGESTED_FOR ("a", "b"); + // dist == 1, max_length == min_length = 1 + + ASSERT_NOT_SUGGESTED_FOR ("sqrt", "assert"); + // dist == 3, max_length 6, min_length == 4 + + ASSERT_NOT_SUGGESTED_FOR ("INT8_MAX", "PATH_MAX"); + // dist == 3, max_length == min_length == 8 + + ASSERT_NOT_SUGGESTED_FOR ("nice", "time"); + ASSERT_NOT_SUGGESTED_FOR ("nanl", "name"); + // dist == 2, max_length == min_length == 4 + + ASSERT_NOT_SUGGESTED_FOR ("carg", "bar"); + ASSERT_NOT_SUGGESTED_FOR ("char", "bar"); + // dist == 2, max_length == 4, min_length == 3 + + ASSERT_NOT_SUGGESTED_FOR ("-optimize", "fsanitize"); + // dist == 5, max_length == min_length == 9 + + ASSERT_NOT_SUGGESTED_FOR ("__DATE__", "__i386__"); + // dist == 4, max_length == min_length == 8 } /* Verify that find_closest_string is sane. */ @@ -291,39 +479,14 @@ test_metric_conditions () } } -/* Verify get_edit_distance for a variety of pairs of pre-canned - inputs, comparing against known-good values. */ +/* Run all of the selftests within this file. */ void spellcheck_c_tests () { - test_get_edit_distance_unit ("", "nonempty", strlen ("nonempty")); - test_get_edit_distance_unit ("saturday", "sunday", 3); - test_get_edit_distance_unit ("foo", "m_foo", 2); - test_get_edit_distance_unit ("hello_world", "HelloWorld", 3); - test_get_edit_distance_unit - ("the quick brown fox jumps over the lazy dog", "dog", 40); - test_get_edit_distance_unit - ("the quick brown fox jumps over the lazy dog", - "the quick brown dog jumps over the lazy fox", - 4); - test_get_edit_distance_unit - ("Lorem ipsum dolor sit amet, consectetur adipiscing elit,", - "All your base are belong to us", - 44); - test_get_edit_distance_unit ("foo", "FOO", 3); - test_get_edit_distance_unit ("fee", "deed", 2); - test_get_edit_distance_unit ("coorzd1", "coordx1", 2); - - /* Examples where transposition helps. */ - test_get_edit_distance_unit ("ab", "ba", 1); - test_get_edit_distance_unit ("ba", "abc", 2); - test_get_edit_distance_unit ("coorzd1", "coordz1", 1); - test_get_edit_distance_unit ("abcdefghijklmnopqrstuvwxyz", - "bacdefghijklmnopqrstuvwxzy", 2); - test_get_edit_distance_unit ("saturday", "sundya", 4); - test_get_edit_distance_unit ("signed", "singed", 1); - + test_edit_distances (); + test_get_edit_distance_cutoff (); + test_suggestions (); test_find_closest_string (); test_metric_conditions (); } diff --git a/gcc/spellcheck.h b/gcc/spellcheck.h index 41ac16f0252..e8fa77ceaa8 100644 --- a/gcc/spellcheck.h +++ b/gcc/spellcheck.h @@ -66,6 +66,9 @@ struct edit_distance_traits } }; +extern edit_distance_t get_edit_distance_cutoff (size_t goal_len, + size_t candidate_len); + /* A type for use when determining the best match against a string, expressed as a template so that we can match against various string-like types (const char *, frontend identifiers, and preprocessor @@ -119,7 +122,7 @@ class best_match /* If the candidate will be unable to beat the criterion in get_best_meaningful_candidate, reject it without computing the exact distance. */ - unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2; + edit_distance_t cutoff = get_cutoff (candidate_len); if (min_candidate_distance > cutoff) return; @@ -151,17 +154,25 @@ class best_match m_best_candidate_len = best_candidate_len; } + /* Generate the maximum edit distance for which we consider a suggestion + to be meaningful, given a candidate of length CANDIDATE_LEN. */ + + edit_distance_t get_cutoff (size_t candidate_len) const + { + return ::get_edit_distance_cutoff (m_goal_len, candidate_len); + } + /* Get the best candidate so far, but applying a filter to ensure that we return NULL if none of the candidates are close to the goal, to avoid offering nonsensical suggestions to the user. */ candidate_t get_best_meaningful_candidate () const { - /* If more than half of the letters were misspelled, the suggestion is - likely to be meaningless. */ + /* If the edit distance is too high, the suggestion is likely to be + meaningless. */ if (m_best_candidate) { - unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2; + edit_distance_t cutoff = get_cutoff (m_best_candidate_len); if (m_best_distance > cutoff) return NULL; } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 43ce2d964fa..25c253fe9af 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2018-09-14 David Malcolm + + PR c/82967 + * c-c++-common/attributes-1.c: Remove bogus suggestion from + dg-prune-output. + * gcc.dg/diagnostic-token-ranges.c (undeclared_identifier): Remove + bogus suggestion. + * gcc.dg/spellcheck-identifiers-4.c: New test. + 2018-09-14 Bernd Edlinger * gcc.dg/warn-strlen-no-nul.c: Add some missing test cases. diff --git a/gcc/testsuite/c-c++-common/attributes-1.c b/gcc/testsuite/c-c++-common/attributes-1.c index c76339ff680..1657da10d5e 100644 --- a/gcc/testsuite/c-c++-common/attributes-1.c +++ b/gcc/testsuite/c-c++-common/attributes-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-prune-output "undeclared here \\(not in a function\\); did you mean .char..|\[^\n\r\]* was not declared in this scope" } */ +/* { dg-prune-output "undeclared here \\(not in a function\\)|\[^\n\r\]* was not declared in this scope" } */ void* my_calloc(unsigned, unsigned) __attribute__((alloc_size(1,bar))); /* { dg-warning "outside range" } */ void* my_realloc(void*, unsigned) __attribute__((alloc_size(bar))); /* { dg-warning "outside range" } */ diff --git a/gcc/testsuite/gcc.dg/diagnostic-token-ranges.c b/gcc/testsuite/gcc.dg/diagnostic-token-ranges.c index 2ef7a01c48e..406bd323cc8 100644 --- a/gcc/testsuite/gcc.dg/diagnostic-token-ranges.c +++ b/gcc/testsuite/gcc.dg/diagnostic-token-ranges.c @@ -8,12 +8,11 @@ long double nanl (const char *); void undeclared_identifier (void) { - name; /* { dg-error "'name' undeclared .first use in this function.; did you mean .nanl." } */ + name; /* { dg-error "'name' undeclared" } */ /* { dg-begin-multiline-output "" } name; ^~~~ - nanl { dg-end-multiline-output "" } */ } diff --git a/gcc/testsuite/gcc.dg/spellcheck-identifiers-4.c b/gcc/testsuite/gcc.dg/spellcheck-identifiers-4.c new file mode 100644 index 00000000000..f9b7d8d0cab --- /dev/null +++ b/gcc/testsuite/gcc.dg/spellcheck-identifiers-4.c @@ -0,0 +1,10 @@ +/* { dg-options "-Wimplicit-function-declaration" } */ + +extern double sqrt (double); + +void test (float pf, float inff) +{ + assert (pf == inff); /* { dg-bogus "sqrt" } */ + /* { dg-warning "implicit declaration of function 'assert'" "" { target *-*-* } .-1 } */ + /* { dg-message "header ''" "" { target *-*-* } .-2 } */ +}