Fix a new warning on Cygwin

[binutils-gdb.git] / gdb / rust-parse.c
diff --git a/gdb/rust-parse.c b/gdb/rust-parse.c

index 2f2afcf7e9fed5acba745ddf51002bd8dc4846a6..7d7d882872cec6e4898c1847528da840afd03eb3 100644 (file)
--- a/gdb/rust-parse.c
+++ b/gdb/rust-parse.c
@@ -1,6 +1,6 @@
  /* Rust expression parsing for GDB, the GNU debugger.
  
-   Copyright (C) 2016-2021 Free Software Foundation, Inc.
+   Copyright (C) 2016-2022 Free Software Foundation, Inc.
  
     This file is part of GDB.
  
@@ -22,8 +22,8 @@
  #include "block.h"
  #include "charset.h"
  #include "cp-support.h"
-#include "gdb_obstack.h"
-#include "gdb_regex.h"
+#include "gdbsupport/gdb_obstack.h"
+#include "gdbsupport/gdb_regex.h"
  #include "rust-lang.h"
  #include "parser-defs.h"
  #include "gdbsupport/selftest.h"
@@ -271,7 +271,10 @@ struct rust_parser
    operation_up parse_entry_point ()
    {
      lex ();
-    return parse_expr ();
+    operation_up result = parse_expr ();
+    if (current_token != 0)
+      error (_("Syntax error near '%s'"), pstate->prev_lexptr);
+    return result;
    }
  
    operation_up parse_tuple ();
@@ -452,7 +455,7 @@ rust_parser::rust_lookup_type (const char *name)
    if (result.symbol != NULL)
      {
        update_innermost_block (result);
-      return SYMBOL_TYPE (result.symbol);
+      return result.symbol->type ();
      }
  
    type = lookup_typename (language (), name, NULL, 1);
@@ -577,6 +580,36 @@ rust_parser::lex_escape (int is_byte)
    return result;
  }
  
+/* A helper for lex_character.  Search forward for the closing single
+   quote, then convert the bytes from the host charset to UTF-32.  */
+
+static uint32_t
+lex_multibyte_char (const char *text, int *len)
+{
+  /* Only look a maximum of 5 bytes for the closing quote.  This is
+     the maximum for UTF-8.  */
+  int quote;
+  gdb_assert (text[0] != '\'');
+  for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote)
+    ;
+  *len = quote;
+  /* The caller will issue an error.  */
+  if (text[quote] == '\0')
+    return 0;
+
+  auto_obstack result;
+  convert_between_encodings (host_charset (), HOST_UTF32,
+                            (const gdb_byte *) text,
+                            quote, 1, &result, translit_none);
+
+  int size = obstack_object_size (&result);
+  if (size > 4)
+    error (_("overlong character literal"));
+  uint32_t value;
+  memcpy (&value, obstack_finish (&result), size);
+  return value;
+}
+
  /* Lex a character constant.  */
  
  int
@@ -592,13 +625,15 @@ rust_parser::lex_character ()
      }
    gdb_assert (pstate->lexptr[0] == '\'');
    ++pstate->lexptr;
-  /* This should handle UTF-8 here.  */
-  if (pstate->lexptr[0] == '\\')
+  if (pstate->lexptr[0] == '\'')
+    error (_("empty character literal"));
+  else if (pstate->lexptr[0] == '\\')
      value = lex_escape (is_byte);
    else
      {
-      value = pstate->lexptr[0] & 0xff;
-      ++pstate->lexptr;
+      int len;
+      value = lex_multibyte_char (&pstate->lexptr[0], &len);
+      pstate->lexptr += len;
      }
  
    if (pstate->lexptr[0] != '\'')
@@ -695,7 +730,8 @@ rust_parser::lex_string ()
           if (is_byte)
             obstack_1grow (&obstack, value);
           else
-           convert_between_encodings ("UTF-32", "UTF-8", (gdb_byte *) &value,
+           convert_between_encodings (HOST_UTF32, "UTF-8",
+                                      (gdb_byte *) &value,
                                        sizeof (value), sizeof (value),
                                        &obstack, translit_none);
         }
@@ -739,7 +775,10 @@ rust_identifier_start_p (char c)
    return ((c >= 'a' && c <= 'z')
           || (c >= 'A' && c <= 'Z')
           || c == '_'
-         || c == '$');
+         || c == '$'
+         /* Allow any non-ASCII character as an identifier.  There
+            doesn't seem to be a need to be picky about this.  */
+         || (c & 0x80) != 0);
  }
  
  /* Lex an identifier.  */
@@ -747,35 +786,47 @@ rust_identifier_start_p (char c)
  int
  rust_parser::lex_identifier ()
  {
-  const char *start = pstate->lexptr;
    unsigned int length;
    const struct token_info *token;
-  int i;
    int is_gdb_var = pstate->lexptr[0] == '$';
  
+  bool is_raw = false;
+  if (pstate->lexptr[0] == 'r'
+      && pstate->lexptr[1] == '#'
+      && rust_identifier_start_p (pstate->lexptr[2]))
+    {
+      is_raw = true;
+      pstate->lexptr += 2;
+    }
+
+  const char *start = pstate->lexptr;
    gdb_assert (rust_identifier_start_p (pstate->lexptr[0]));
  
    ++pstate->lexptr;
  
-  /* For the time being this doesn't handle Unicode rules.  Non-ASCII
-     identifiers are gated anyway.  */
+  /* Allow any non-ASCII character here.  This "handles" UTF-8 by
+     passing it through.  */
    while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z')
          || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z')
          || pstate->lexptr[0] == '_'
          || (is_gdb_var && pstate->lexptr[0] == '$')
-        || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9'))
+        || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
+        || (pstate->lexptr[0] & 0x80) != 0)
      ++pstate->lexptr;
  
  
    length = pstate->lexptr - start;
    token = NULL;
-  for (i = 0; i < ARRAY_SIZE (identifier_tokens); ++i)
+  if (!is_raw)
      {
-      if (length == strlen (identifier_tokens[i].name)
-         && strncmp (identifier_tokens[i].name, start, length) == 0)
+      for (const auto &candidate : identifier_tokens)
         {
-         token = &identifier_tokens[i];
-         break;
+         if (length == strlen (candidate.name)
+             && strncmp (candidate.name, start, length) == 0)
+           {
+             token = &candidate;
+             break;
+           }
         }
      }
  
@@ -789,6 +840,7 @@ rust_parser::lex_identifier ()
         }
      }
    else if (token == NULL
+          && !is_raw
            && (strncmp (start, "thread", length) == 0
                || strncmp (start, "task", length) == 0)
            && space_then_number (pstate->lexptr))
@@ -825,15 +877,14 @@ int
  rust_parser::lex_operator ()
  {
    const struct token_info *token = NULL;
-  int i;
  
-  for (i = 0; i < ARRAY_SIZE (operator_tokens); ++i)
+  for (const auto &candidate : operator_tokens)
      {
-      if (strncmp (operator_tokens[i].name, pstate->lexptr,
-                  strlen (operator_tokens[i].name)) == 0)
+      if (strncmp (candidate.name, pstate->lexptr,
+                  strlen (candidate.name)) == 0)
         {
-         pstate->lexptr += strlen (operator_tokens[i].name);
-         token = &operator_tokens[i];
+         pstate->lexptr += strlen (candidate.name);
+         token = &candidate;
           break;
         }
      }
@@ -1087,7 +1138,7 @@ rust_parser::parse_tuple ()
      {
        /* Parenthesized expression.  */
        lex ();
-      return expr;
+      return make_operation<rust_parenthesized_operation> (std::move (expr));
      }
  
    std::vector<operation_up> ops;
@@ -1158,15 +1209,15 @@ rust_parser::name_to_operation (const std::string &name)
    struct block_symbol sym = lookup_symbol (name.c_str (),
                                            pstate->expression_context_block,
                                            VAR_DOMAIN);
-  if (sym.symbol != nullptr && SYMBOL_CLASS (sym.symbol) != LOC_TYPEDEF)
+  if (sym.symbol != nullptr && sym.symbol->aclass () != LOC_TYPEDEF)
      return make_operation<var_value_operation> (sym);
  
    struct type *type = nullptr;
  
    if (sym.symbol != nullptr)
      {
-      gdb_assert (SYMBOL_CLASS (sym.symbol) == LOC_TYPEDEF);
-      type = SYMBOL_TYPE (sym.symbol);
+      gdb_assert (sym.symbol->aclass () == LOC_TYPEDEF);
+      type = sym.symbol->type ();
      }
    if (type == nullptr)
      type = rust_lookup_type (name.c_str ());
@@ -1972,6 +2023,7 @@ rust_parser::parse_atom (bool required)
  
      case STRING:
        result = parse_string ();
+      lex ();
        break;
  
      case BYTESTRING:
@@ -2226,8 +2278,6 @@ rust_lex_test_push_back (rust_parser *parser)
  static void
  rust_lex_tests (void)
  {
-  int i;
-
    /* Set up dummy "parser", so that rust_type works.  */
    struct parser_state ps (language_def (language_rust), target_gdbarch (),
                           nullptr, 0, 0, nullptr, 0, nullptr, false);
@@ -2300,6 +2350,13 @@ rust_lex_tests (void)
    rust_lex_stringish_test (&parser, "hibob", "hibob", IDENT);
    rust_lex_stringish_test (&parser, "hibob__93", "hibob__93", IDENT);
    rust_lex_stringish_test (&parser, "thread", "thread", IDENT);
+  rust_lex_stringish_test (&parser, "r#true", "true", IDENT);
+
+  const int expected1[] = { IDENT, DECIMAL_INTEGER, 0 };
+  rust_lex_test_sequence (&parser, "r#thread 23", ARRAY_SIZE (expected1),
+                         expected1);
+  const int expected2[] = { IDENT, '#', 0 };
+  rust_lex_test_sequence (&parser, "r#", ARRAY_SIZE (expected2), expected2);
  
    rust_lex_stringish_test (&parser, "\"string\"", "string", STRING);
    rust_lex_stringish_test (&parser, "\"str\\ting\"", "str\ting", STRING);
@@ -2315,13 +2372,11 @@ rust_lex_tests (void)
    rust_lex_stringish_test (&parser, "br####\"\\x73tring\"####", "\\x73tring",
                            BYTESTRING);
  
-  for (i = 0; i < ARRAY_SIZE (identifier_tokens); ++i)
-    rust_lex_test_one (&parser, identifier_tokens[i].name,
-                      identifier_tokens[i].value);
+  for (const auto &candidate : identifier_tokens)
+    rust_lex_test_one (&parser, candidate.name, candidate.value);
  
-  for (i = 0; i < ARRAY_SIZE (operator_tokens); ++i)
-    rust_lex_test_one (&parser, operator_tokens[i].name,
-                      operator_tokens[i].value);
+  for (const auto &candidate : operator_tokens)
+    rust_lex_test_one (&parser, candidate.name, candidate.value);
  
    rust_lex_test_completion (&parser);
    rust_lex_test_push_back (&parser);