Make strcmp_iw NOT ignore whitespace in the middle of tokens
authorPedro Alves <palves@redhat.com>
Fri, 24 Nov 2017 23:30:04 +0000 (23:30 +0000)
committerPedro Alves <palves@redhat.com>
Fri, 24 Nov 2017 23:30:04 +0000 (23:30 +0000)
currently "b func tion" manages to set a breakpoint at "function" !

All these years I had never noticed this, but now that the linespec
completer actually works, this easily happens by accident, with:

  "b func t<tab>"

expecting to get "thread", but getting instead:

  "b func tion"

...

Also, this:

  "b rettypefunc<int>"

manages to set a breakpoint on "rettype func<int>()".

These things happen due to strcmp_iw "magic".

Fix it by teaching strcmp_iw about when can it skip whitespace.  This
required handling user-defined operators, and scope operators,
complicating the code a bit, unfortunately.  I added unit tests for
all the corner cases I stumbled on, as I was developing this, and then
in the end wrote a testsuite testcase covering many of the same things
and more (to be added later).

gdb/ChangeLog:
2017-11-24  Pedro Alves  <palves@redhat.com>

* cp-support.c (cp_symbol_name_matches_1): New, factored out from
cp_fq_symbol_name_matches.  Pass language_cplus to
strncmp_with_mode.
(cp_fq_symbol_name_matches): Call cp_symbol_name_matches_1.
(selftests::test_cp_symbol_name_cmp): New.
(_initialize_cp_support): Register "cp_symbol_name_matches"
selftests.
* language.c (default_symbol_name_matcher): Pass language_minimal
to strncmp_iw_with_mode.
* utils.c: Include "cp-support.h" and <algorithm>.
(valid_identifier_name_char, cp_skip_operator_token, skip_ws)
(cp_is_operator): New functions.
(strncmp_iw_with_mode): Use them.  Add language parameter.  Don't
skip whitespace in the symbol name when the lookup name doesn't
have spaces, and vice versa.
(strncmp_iw, strcmp_iw): Pass language to strncmp_iw_with_mode.
* utils.h (strncmp_iw_with_mode): Add language parameter.

gdb/ChangeLog
gdb/cp-support.c
gdb/language.c
gdb/utils.c
gdb/utils.h

index 26d5cd3d46c8e7e9dcdd7a547b4c930d06134b71..befce60ce60908a9c7f936b7d6063fe1e2fb2bfb 100644 (file)
@@ -1,3 +1,23 @@
+2017-11-24  Pedro Alves  <palves@redhat.com>
+
+       * cp-support.c (cp_symbol_name_matches_1): New, factored out from
+       cp_fq_symbol_name_matches.  Pass language_cplus to
+       strncmp_with_mode.
+       (cp_fq_symbol_name_matches): Call cp_symbol_name_matches_1.
+       (selftests::test_cp_symbol_name_cmp): New.
+       (_initialize_cp_support): Register "cp_symbol_name_matches"
+       selftests.
+       * language.c (default_symbol_name_matcher): Pass language_minimal
+       to strncmp_iw_with_mode.
+       * utils.c: Include "cp-support.h" and <algorithm>.
+       (valid_identifier_name_char, cp_skip_operator_token, skip_ws)
+       (cp_is_operator): New functions.
+       (strncmp_iw_with_mode): Use them.  Add language parameter.  Don't
+       skip whitespace in the symbol name when the lookup name doesn't
+       have spaces, and vice versa.
+       (strncmp_iw, strcmp_iw): Pass language to strncmp_iw_with_mode.
+       * utils.h (strncmp_iw_with_mode): Add language parameter.
+
 2017-11-24  Joel Brobecker  <brobecker@adacore.com>
 
        * ada-lang.c (ada_exception_message_1, ada_exception_message):
index 1cab69b300fe8e7a617086e339df48ff5d9e2a71..368112a529434e3850e408047d011cba699b7846 100644 (file)
@@ -1617,6 +1617,39 @@ gdb_sniff_from_mangled_name (const char *mangled, char **demangled)
 
 /* C++ symbol_name_matcher_ftype implementation.  */
 
+/* Helper for cp_fq_symbol_name_matches (i.e.,
+   symbol_name_matcher_ftype implementation).  Split to a separate
+   function for unit-testing convenience.
+
+   See symbol_name_matcher_ftype for description of SYMBOL_SEARCH_NAME
+   and COMP_MATCH_RES.
+
+   LOOKUP_NAME/LOOKUP_NAME_LEN is the name we're looking up.
+
+   See strncmp_iw_with_mode for description of MODE.
+*/
+
+static bool
+cp_symbol_name_matches_1 (const char *symbol_search_name,
+                         const char *lookup_name,
+                         size_t lookup_name_len,
+                         strncmp_iw_mode mode,
+                         completion_match *match)
+{
+  if (strncmp_iw_with_mode (symbol_search_name,
+                           lookup_name, lookup_name_len,
+                           mode, language_cplus) == 0)
+    {
+      if (match != NULL)
+       match->set_match (symbol_search_name);
+      return true;
+    }
+
+  return false;
+}
+
+/* C++ symbol_name_matcher_ftype implementation.  */
+
 static bool
 cp_fq_symbol_name_matches (const char *symbol_search_name,
                           const lookup_name_info &lookup_name,
@@ -1629,16 +1662,9 @@ cp_fq_symbol_name_matches (const char *symbol_search_name,
                          ? strncmp_iw_mode::NORMAL
                          : strncmp_iw_mode::MATCH_PARAMS);
 
-  if (strncmp_iw_with_mode (symbol_search_name,
-                           name.c_str (), name.size (),
-                           mode) == 0)
-    {
-      if (match != NULL)
-       match->set_match (symbol_search_name);
-      return true;
-    }
-
-  return false;
+  return cp_symbol_name_matches_1 (symbol_search_name,
+                                  name.c_str (), name.size (),
+                                  mode, match);
 }
 
 /* See cp-support.h.  */
@@ -1653,6 +1679,136 @@ cp_get_symbol_name_matcher (const lookup_name_info &lookup_name)
 
 namespace selftests {
 
+void
+test_cp_symbol_name_matches ()
+{
+#define CHECK_MATCH(SYMBOL, INPUT)                                     \
+  SELF_CHECK (cp_symbol_name_matches_1 (SYMBOL,                                \
+                                       INPUT, sizeof (INPUT) - 1,      \
+                                       strncmp_iw_mode::MATCH_PARAMS,  \
+                                       NULL))
+
+#define CHECK_NOT_MATCH(SYMBOL, INPUT)                                 \
+  SELF_CHECK (!cp_symbol_name_matches_1 (SYMBOL,                       \
+                                        INPUT, sizeof (INPUT) - 1,     \
+                                        strncmp_iw_mode::MATCH_PARAMS, \
+                                        NULL))
+
+  /* Like CHECK_MATCH, and also check that INPUT (and all substrings
+     that start at index 0) completes to SYMBOL.  */
+#define CHECK_MATCH_C(SYMBOL, INPUT)                                   \
+  do                                                                   \
+    {                                                                  \
+      CHECK_MATCH (SYMBOL, INPUT);                                     \
+      for (size_t i = 0; i < sizeof (INPUT) - 1; i++)                  \
+       SELF_CHECK (cp_symbol_name_matches_1 (SYMBOL, INPUT, i,         \
+                                             strncmp_iw_mode::NORMAL,  \
+                                             NULL));                   \
+    } while (0)
+
+  /* Like CHECK_NOT_MATCH, and also check that INPUT does NOT complete
+     to SYMBOL.  */
+#define CHECK_NOT_MATCH_C(SYMBOL, INPUT)                               \
+  do                                                                   \
+    {                                                                  \
+      CHECK_NOT_MATCH (SYMBOL, INPUT);                                 \
+      SELF_CHECK (!cp_symbol_name_matches_1 (SYMBOL, INPUT,            \
+                                            sizeof (INPUT) - 1,        \
+                                            strncmp_iw_mode::NORMAL,   \
+                                            NULL));                    \
+    } while (0)
+
+  /* Lookup name without parens matches all overloads.  */
+  CHECK_MATCH_C ("function()", "function");
+  CHECK_MATCH_C ("function(int)", "function");
+
+  /* Check whitespace around parameters is ignored.  */
+  CHECK_MATCH_C ("function()", "function ()");
+  CHECK_MATCH_C ("function ( )", "function()");
+  CHECK_MATCH_C ("function ()", "function( )");
+  CHECK_MATCH_C ("func(int)", "func( int )");
+  CHECK_MATCH_C ("func(int)", "func ( int ) ");
+  CHECK_MATCH_C ("func ( int )", "func( int )");
+  CHECK_MATCH_C ("func ( int )", "func ( int ) ");
+
+  /* Check symbol name prefixes aren't incorrectly matched.  */
+  CHECK_NOT_MATCH ("func", "function");
+  CHECK_NOT_MATCH ("function", "func");
+  CHECK_NOT_MATCH ("function()", "func");
+
+  /* Check that if the lookup name includes parameters, only the right
+     overload matches.  */
+  CHECK_MATCH_C ("function(int)", "function(int)");
+  CHECK_NOT_MATCH_C ("function(int)", "function()");
+
+  /* Check that whitespace within symbol names is not ignored.  */
+  CHECK_NOT_MATCH_C ("function", "func tion");
+  CHECK_NOT_MATCH_C ("func__tion", "func_ _tion");
+  CHECK_NOT_MATCH_C ("func11tion", "func1 1tion");
+
+  /* Check the converse, which can happen with template function,
+     where the return type is part of the demangled name.  */
+  CHECK_NOT_MATCH_C ("func tion", "function");
+  CHECK_NOT_MATCH_C ("func1 1tion", "func11tion");
+  CHECK_NOT_MATCH_C ("func_ _tion", "func__tion");
+
+  /* Within parameters too.  */
+  CHECK_NOT_MATCH_C ("func(param)", "func(par am)");
+
+  /* Check handling of whitespace around C++ operators.  */
+  CHECK_NOT_MATCH_C ("operator<<", "opera tor<<");
+  CHECK_NOT_MATCH_C ("operator<<", "operator< <");
+  CHECK_NOT_MATCH_C ("operator<<", "operator < <");
+  CHECK_NOT_MATCH_C ("operator==", "operator= =");
+  CHECK_NOT_MATCH_C ("operator==", "operator = =");
+  CHECK_MATCH_C ("operator<<", "operator <<");
+  CHECK_MATCH_C ("operator<<()", "operator <<");
+  CHECK_NOT_MATCH_C ("operator<<()", "operator<<(int)");
+  CHECK_NOT_MATCH_C ("operator<<(int)", "operator<<()");
+  CHECK_MATCH_C ("operator==", "operator ==");
+  CHECK_MATCH_C ("operator==()", "operator ==");
+  CHECK_MATCH_C ("operator <<", "operator<<");
+  CHECK_MATCH_C ("operator ==", "operator==");
+  CHECK_MATCH_C ("operator bool", "operator  bool");
+  CHECK_MATCH_C ("operator bool ()", "operator  bool");
+  CHECK_MATCH_C ("operatorX<<", "operatorX < <");
+  CHECK_MATCH_C ("Xoperator<<", "Xoperator < <");
+
+  CHECK_MATCH_C ("operator()(int)", "operator()(int)");
+  CHECK_MATCH_C ("operator()(int)", "operator ( ) ( int )");
+  CHECK_MATCH_C ("operator()<long>(int)", "operator ( ) < long > ( int )");
+  /* The first "()" is not the parameter list.  */
+  CHECK_NOT_MATCH ("operator()(int)", "operator");
+
+  /* Misc user-defined operator tests.  */
+
+  CHECK_NOT_MATCH_C ("operator/=()", "operator ^=");
+  /* Same length at end of input.  */
+  CHECK_NOT_MATCH_C ("operator>>", "operator[]");
+  /* Same length but not at end of input.  */
+  CHECK_NOT_MATCH_C ("operator>>()", "operator[]()");
+
+  CHECK_MATCH_C ("base::operator char*()", "base::operator char*()");
+  CHECK_MATCH_C ("base::operator char*()", "base::operator char * ()");
+  CHECK_MATCH_C ("base::operator char**()", "base::operator char * * ()");
+  CHECK_MATCH ("base::operator char**()", "base::operator char * *");
+  CHECK_MATCH_C ("base::operator*()", "base::operator*()");
+  CHECK_NOT_MATCH_C ("base::operator char*()", "base::operatorc");
+  CHECK_NOT_MATCH ("base::operator char*()", "base::operator char");
+  CHECK_NOT_MATCH ("base::operator char*()", "base::operat");
+
+  /* Check handling of whitespace around C++ scope operators.  */
+  CHECK_NOT_MATCH_C ("foo::bar", "foo: :bar");
+  CHECK_MATCH_C ("foo::bar", "foo :: bar");
+  CHECK_MATCH_C ("foo :: bar", "foo::bar");
+
+  CHECK_MATCH_C ("abc::def::ghi()", "abc::def::ghi()");
+  CHECK_MATCH_C ("abc::def::ghi ( )", "abc::def::ghi()");
+  CHECK_MATCH_C ("abc::def::ghi()", "abc::def::ghi ( )");
+  CHECK_MATCH_C ("function()", "function()");
+  CHECK_MATCH_C ("bar::function()", "bar::function()");
+}
+
 /* If non-NULL, return STR wrapped in quotes.  Otherwise, return a
    "<null>" string (with no quotes).  */
 
@@ -1856,6 +2012,8 @@ display the offending symbol."),
 #endif
 
 #if GDB_SELF_TEST
+  selftests::register_test ("cp_symbol_name_matches",
+                           selftests::test_cp_symbol_name_matches);
   selftests::register_test ("cp_remove_params",
                            selftests::test_cp_remove_params);
 #endif
index 76047c71861323c77101a570765302a7193d9396..2a1419cd3006139b81c52237bb8c93da00e50c7a 100644 (file)
@@ -713,7 +713,7 @@ default_symbol_name_matcher (const char *symbol_search_name,
                          : strncmp_iw_mode::MATCH_PARAMS);
 
   if (strncmp_iw_with_mode (symbol_search_name, name.c_str (), name.size (),
-                           mode) == 0)
+                           mode, language_minimal) == 0)
     {
       if (match != NULL)
        match->set_match (symbol_search_name);
index b5c011bcdc07a95d570e47de8f6e776ec19748b1..3e817edaebf1db1cb340fe13dd29a8f7c8a82f95 100644 (file)
@@ -68,6 +68,8 @@
 #include "job-control.h"
 #include "common/selftest.h"
 #include "common/gdb_optional.h"
+#include "cp-support.h"
+#include <algorithm>
 
 #if !HAVE_DECL_MALLOC
 extern PTR malloc ();          /* ARI: PTR */
@@ -2156,22 +2158,233 @@ fprintf_symbol_filtered (struct ui_file *stream, const char *name,
     }
 }
 
+/* True if CH is a character that can be part of a symbol name.  I.e.,
+   either a number, a letter, or a '_'.  */
+
+static bool
+valid_identifier_name_char (int ch)
+{
+  return (isalnum (ch) || ch == '_');
+}
+
+/* Skip to end of token, or to END, whatever comes first.  Input is
+   assumed to be a C++ operator name.  */
+
+static const char *
+cp_skip_operator_token (const char *token, const char *end)
+{
+  const char *p = token;
+  while (p != end && !isspace (*p) && *p != '(')
+    {
+      if (valid_identifier_name_char (*p))
+       {
+         while (p != end && valid_identifier_name_char (*p))
+           p++;
+         return p;
+       }
+      else
+       {
+         /* Note, ordered such that among ops that share a prefix,
+            longer comes first.  This is so that the loop below can
+            bail on first match.  */
+         static const char *ops[] =
+           {
+             "[",
+             "]",
+             "~",
+             ",",
+             "-=", "--", "->", "-",
+             "+=", "++", "+",
+             "*=", "*",
+             "/=", "/",
+             "%=", "%",
+             "|=", "||", "|",
+             "&=", "&&", "&",
+             "^=", "^",
+             "!=", "!",
+             "<<=", "<=", "<<", "<",
+             ">>=", ">=", ">>", ">",
+             "==", "=",
+           };
+
+         for (const char *op : ops)
+           {
+             size_t oplen = strlen (op);
+             size_t lencmp = std::min<size_t> (oplen, end - p);
+
+             if (strncmp (p, op, lencmp) == 0)
+               return p + lencmp;
+           }
+         /* Some unidentified character.  Return it.  */
+         return p + 1;
+       }
+    }
+
+  return p;
+}
+
+/* Advance STRING1/STRING2 past whitespace.  */
+
+static void
+skip_ws (const char *&string1, const char *&string2, const char *end_str2)
+{
+  while (isspace (*string1))
+    string1++;
+  while (string2 < end_str2 && isspace (*string2))
+    string2++;
+}
+
+/* True if STRING points at the start of a C++ operator name.  START
+   is the start of the string that STRING points to, hence when
+   reading backwards, we must not read any character before START.  */
+
+static bool
+cp_is_operator (const char *string, const char *start)
+{
+  return ((string == start
+          || !valid_identifier_name_char (string[-1]))
+         && strncmp (string, CP_OPERATOR_STR, CP_OPERATOR_LEN) == 0
+         && !valid_identifier_name_char (string[CP_OPERATOR_LEN]));
+}
+
 /* See utils.h.  */
 
 int
 strncmp_iw_with_mode (const char *string1, const char *string2,
-                     size_t string2_len, strncmp_iw_mode mode)
+                     size_t string2_len, strncmp_iw_mode mode,
+                     enum language language)
 {
+  const char *string1_start = string1;
   const char *end_str2 = string2 + string2_len;
+  bool skip_spaces = true;
+  bool have_colon_op = (language == language_cplus
+                       || language == language_rust
+                       || language == language_fortran);
 
   while (1)
     {
-      while (isspace (*string1))
-       string1++;
-      while (string2 < end_str2 && isspace (*string2))
-       string2++;
+      if (skip_spaces
+         || ((isspace (*string1) && !valid_identifier_name_char (*string2))
+             || (isspace (*string2) && !valid_identifier_name_char (*string1))))
+       {
+         skip_ws (string1, string2, end_str2);
+         skip_spaces = false;
+       }
+
       if (*string1 == '\0' || string2 == end_str2)
        break;
+
+      /* Handle the :: operator.  */
+      if (have_colon_op && string1[0] == ':' && string1[1] == ':')
+       {
+         if (*string2 != ':')
+           return 1;
+
+         string1++;
+         string2++;
+
+         if (string2 == end_str2)
+           break;
+
+         if (*string2 != ':')
+           return 1;
+
+         string1++;
+         string2++;
+
+         while (isspace (*string1))
+           string1++;
+         while (string2 < end_str2 && isspace (*string2))
+           string2++;
+         continue;
+       }
+
+      /* Handle C++ user-defined operators.  */
+      else if (language == language_cplus
+              && *string1 == 'o')
+       {
+         if (cp_is_operator (string1, string1_start))
+           {
+             /* An operator name in STRING1.  Check STRING2.  */
+             size_t cmplen
+               = std::min<size_t> (CP_OPERATOR_LEN, end_str2 - string2);
+             if (strncmp (string1, string2, cmplen) != 0)
+               return 1;
+
+             string1 += cmplen;
+             string2 += cmplen;
+
+             if (string2 != end_str2)
+               {
+                 /* Check for "operatorX" in STRING2.  */
+                 if (valid_identifier_name_char (*string2))
+                   return 1;
+
+                 skip_ws (string1, string2, end_str2);
+               }
+
+             /* Handle operator().  */
+             if (*string1 == '(')
+               {
+                 if (string2 == end_str2)
+                   {
+                     if (mode == strncmp_iw_mode::NORMAL)
+                       return 0;
+                     else
+                       {
+                         /* Don't break for the regular return at the
+                            bottom, because "operator" should not
+                            match "operator()", since this open
+                            parentheses is not the parameter list
+                            start.  */
+                         return *string1 != '\0';
+                       }
+                   }
+
+                 if (*string1 != *string2)
+                   return 1;
+
+                 string1++;
+                 string2++;
+               }
+
+             while (1)
+               {
+                 skip_ws (string1, string2, end_str2);
+
+                 /* Skip to end of token, or to END, whatever comes
+                    first.  */
+                 const char *end_str1 = string1 + strlen (string1);
+                 const char *p1 = cp_skip_operator_token (string1, end_str1);
+                 const char *p2 = cp_skip_operator_token (string2, end_str2);
+
+                 cmplen = std::min (p1 - string1, p2 - string2);
+                 if (p2 == end_str2)
+                   {
+                     if (strncmp (string1, string2, cmplen) != 0)
+                       return 1;
+                   }
+                 else
+                   {
+                     if (p1 - string1 != p2 - string2)
+                       return 1;
+                     if (strncmp (string1, string2, cmplen) != 0)
+                       return 1;
+                   }
+
+                 string1 += cmplen;
+                 string2 += cmplen;
+
+                 if (*string1 == '\0' || string2 == end_str2)
+                   break;
+                 if (*string1 == '(' || *string2 == '(')
+                   break;
+               }
+
+             continue;
+           }
+       }
+
       if (case_sensitivity == case_sensitive_on && *string1 != *string2)
        break;
       if (case_sensitivity == case_sensitive_off
@@ -2179,6 +2392,12 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
              != tolower ((unsigned char) *string2)))
        break;
 
+      /* If we see any non-whitespace, non-identifier-name character
+        (any of "()<>*&" etc.), then skip spaces the next time
+        around.  */
+      if (!isspace (*string1) && !valid_identifier_name_char (*string1))
+       skip_spaces = true;
+
       string1++;
       string2++;
     }
@@ -2200,7 +2419,7 @@ int
 strncmp_iw (const char *string1, const char *string2, size_t string2_len)
 {
   return strncmp_iw_with_mode (string1, string2, string2_len,
-                              strncmp_iw_mode::NORMAL);
+                              strncmp_iw_mode::NORMAL, language_minimal);
 }
 
 /* See utils.h.  */
@@ -2209,7 +2428,7 @@ int
 strcmp_iw (const char *string1, const char *string2)
 {
   return strncmp_iw_with_mode (string1, string2, strlen (string2),
-                              strncmp_iw_mode::MATCH_PARAMS);
+                              strncmp_iw_mode::MATCH_PARAMS, language_minimal);
 }
 
 /* This is like strcmp except that it ignores whitespace and treats
index e2fa430be58d68b5ceaee5bac0bc4b8477c7ab4b..dff4b1703cd13280f4cc7d222b07d70a5e8fe98b 100644 (file)
@@ -48,17 +48,24 @@ enum class strncmp_iw_mode
 
 /* Helper for strcmp_iw and strncmp_iw.  Exported so that languages
    can implement both NORMAL and MATCH_PARAMS variants in a single
-   function and defer part of the work to strncmp_iw_with_mode.  */
+   function and defer part of the work to strncmp_iw_with_mode.
+   LANGUAGE is used to implement some context-sensitive
+   language-specific comparisons.  For example, for C++,
+   "string1=operator()" should not match "string2=operator" even in
+   MATCH_PARAMS mode.  */
 extern int strncmp_iw_with_mode (const char *string1,
                                 const char *string2,
                                 size_t string2_len,
-                                strncmp_iw_mode mode);
+                                strncmp_iw_mode mode,
+                                enum language language);
 
 /* Do a strncmp() type operation on STRING1 and STRING2, ignoring any
    differences in whitespace.  STRING2_LEN is STRING2's length.
    Returns 0 if STRING1 matches STRING2_LEN characters of STRING2,
    non-zero otherwise (slightly different than strncmp()'s range of
-   return values).  */
+   return values).  Note: passes language_minimal to
+   strncmp_iw_with_mode, and should therefore be avoided if a more
+   suitable language is available.  */
 extern int strncmp_iw (const char *string1, const char *string2,
                       size_t string2_len);
 
@@ -70,7 +77,10 @@ extern int strncmp_iw (const char *string1, const char *string2,
    As an extra hack, string1=="FOO(ARGS)" matches string2=="FOO".
    This "feature" is useful when searching for matching C++ function
    names (such as if the user types 'break FOO', where FOO is a
-   mangled C++ function).  */
+   mangled C++ function).
+
+   Note: passes language_minimal to strncmp_iw_with_mode, and should
+   therefore be avoided if a more suitable language is available.  */
 extern int strcmp_iw (const char *string1, const char *string2);
 
 extern int strcmp_iw_ordered (const char *, const char *);