/* Rust expression parsing for GDB, the GNU debugger.
- Copyright (C) 2016-2021 Free Software Foundation, Inc.
+ Copyright (C) 2016-2022 Free Software Foundation, Inc.
This file is part of GDB.
#include "block.h"
#include "charset.h"
#include "cp-support.h"
-#include "gdb_obstack.h"
-#include "gdb_regex.h"
+#include "gdbsupport/gdb_obstack.h"
+#include "gdbsupport/gdb_regex.h"
#include "rust-lang.h"
#include "parser-defs.h"
#include "gdbsupport/selftest.h"
operation_up parse_entry_point ()
{
lex ();
- return parse_expr ();
+ operation_up result = parse_expr ();
+ if (current_token != 0)
+ error (_("Syntax error near '%s'"), pstate->prev_lexptr);
+ return result;
}
operation_up parse_tuple ();
if (result.symbol != NULL)
{
update_innermost_block (result);
- return SYMBOL_TYPE (result.symbol);
+ return result.symbol->type ();
}
type = lookup_typename (language (), name, NULL, 1);
return result;
}
+/* A helper for lex_character. Search forward for the closing single
+ quote, then convert the bytes from the host charset to UTF-32. */
+
+static uint32_t
+lex_multibyte_char (const char *text, int *len)
+{
+ /* Only look a maximum of 5 bytes for the closing quote. This is
+ the maximum for UTF-8. */
+ int quote;
+ gdb_assert (text[0] != '\'');
+ for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote)
+ ;
+ *len = quote;
+ /* The caller will issue an error. */
+ if (text[quote] == '\0')
+ return 0;
+
+ auto_obstack result;
+ convert_between_encodings (host_charset (), HOST_UTF32,
+ (const gdb_byte *) text,
+ quote, 1, &result, translit_none);
+
+ int size = obstack_object_size (&result);
+ if (size > 4)
+ error (_("overlong character literal"));
+ uint32_t value;
+ memcpy (&value, obstack_finish (&result), size);
+ return value;
+}
+
/* Lex a character constant. */
int
}
gdb_assert (pstate->lexptr[0] == '\'');
++pstate->lexptr;
- /* This should handle UTF-8 here. */
- if (pstate->lexptr[0] == '\\')
+ if (pstate->lexptr[0] == '\'')
+ error (_("empty character literal"));
+ else if (pstate->lexptr[0] == '\\')
value = lex_escape (is_byte);
else
{
- value = pstate->lexptr[0] & 0xff;
- ++pstate->lexptr;
+ int len;
+ value = lex_multibyte_char (&pstate->lexptr[0], &len);
+ pstate->lexptr += len;
}
if (pstate->lexptr[0] != '\'')
if (is_byte)
obstack_1grow (&obstack, value);
else
- convert_between_encodings ("UTF-32", "UTF-8", (gdb_byte *) &value,
+ convert_between_encodings (HOST_UTF32, "UTF-8",
+ (gdb_byte *) &value,
sizeof (value), sizeof (value),
&obstack, translit_none);
}
return ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| c == '_'
- || c == '$');
+ || c == '$'
+ /* Allow any non-ASCII character as an identifier. There
+ doesn't seem to be a need to be picky about this. */
+ || (c & 0x80) != 0);
}
/* Lex an identifier. */
int
rust_parser::lex_identifier ()
{
- const char *start = pstate->lexptr;
unsigned int length;
const struct token_info *token;
- int i;
int is_gdb_var = pstate->lexptr[0] == '$';
+ bool is_raw = false;
+ if (pstate->lexptr[0] == 'r'
+ && pstate->lexptr[1] == '#'
+ && rust_identifier_start_p (pstate->lexptr[2]))
+ {
+ is_raw = true;
+ pstate->lexptr += 2;
+ }
+
+ const char *start = pstate->lexptr;
gdb_assert (rust_identifier_start_p (pstate->lexptr[0]));
++pstate->lexptr;
- /* For the time being this doesn't handle Unicode rules. Non-ASCII
- identifiers are gated anyway. */
+ /* Allow any non-ASCII character here. This "handles" UTF-8 by
+ passing it through. */
while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z')
|| (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z')
|| pstate->lexptr[0] == '_'
|| (is_gdb_var && pstate->lexptr[0] == '$')
- || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9'))
+ || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
+ || (pstate->lexptr[0] & 0x80) != 0)
++pstate->lexptr;
length = pstate->lexptr - start;
token = NULL;
- for (i = 0; i < ARRAY_SIZE (identifier_tokens); ++i)
+ if (!is_raw)
{
- if (length == strlen (identifier_tokens[i].name)
- && strncmp (identifier_tokens[i].name, start, length) == 0)
+ for (const auto &candidate : identifier_tokens)
{
- token = &identifier_tokens[i];
- break;
+ if (length == strlen (candidate.name)
+ && strncmp (candidate.name, start, length) == 0)
+ {
+ token = &candidate;
+ break;
+ }
}
}
}
}
else if (token == NULL
+ && !is_raw
&& (strncmp (start, "thread", length) == 0
|| strncmp (start, "task", length) == 0)
&& space_then_number (pstate->lexptr))
rust_parser::lex_operator ()
{
const struct token_info *token = NULL;
- int i;
- for (i = 0; i < ARRAY_SIZE (operator_tokens); ++i)
+ for (const auto &candidate : operator_tokens)
{
- if (strncmp (operator_tokens[i].name, pstate->lexptr,
- strlen (operator_tokens[i].name)) == 0)
+ if (strncmp (candidate.name, pstate->lexptr,
+ strlen (candidate.name)) == 0)
{
- pstate->lexptr += strlen (operator_tokens[i].name);
- token = &operator_tokens[i];
+ pstate->lexptr += strlen (candidate.name);
+ token = &candidate;
break;
}
}
{
/* Parenthesized expression. */
lex ();
- return expr;
+ return make_operation<rust_parenthesized_operation> (std::move (expr));
}
std::vector<operation_up> ops;
struct block_symbol sym = lookup_symbol (name.c_str (),
pstate->expression_context_block,
VAR_DOMAIN);
- if (sym.symbol != nullptr && SYMBOL_CLASS (sym.symbol) != LOC_TYPEDEF)
+ if (sym.symbol != nullptr && sym.symbol->aclass () != LOC_TYPEDEF)
return make_operation<var_value_operation> (sym);
struct type *type = nullptr;
if (sym.symbol != nullptr)
{
- gdb_assert (SYMBOL_CLASS (sym.symbol) == LOC_TYPEDEF);
- type = SYMBOL_TYPE (sym.symbol);
+ gdb_assert (sym.symbol->aclass () == LOC_TYPEDEF);
+ type = sym.symbol->type ();
}
if (type == nullptr)
type = rust_lookup_type (name.c_str ());
case STRING:
result = parse_string ();
+ lex ();
break;
case BYTESTRING:
static void
rust_lex_tests (void)
{
- int i;
-
/* Set up dummy "parser", so that rust_type works. */
struct parser_state ps (language_def (language_rust), target_gdbarch (),
nullptr, 0, 0, nullptr, 0, nullptr, false);
rust_lex_stringish_test (&parser, "hibob", "hibob", IDENT);
rust_lex_stringish_test (&parser, "hibob__93", "hibob__93", IDENT);
rust_lex_stringish_test (&parser, "thread", "thread", IDENT);
+ rust_lex_stringish_test (&parser, "r#true", "true", IDENT);
+
+ const int expected1[] = { IDENT, DECIMAL_INTEGER, 0 };
+ rust_lex_test_sequence (&parser, "r#thread 23", ARRAY_SIZE (expected1),
+ expected1);
+ const int expected2[] = { IDENT, '#', 0 };
+ rust_lex_test_sequence (&parser, "r#", ARRAY_SIZE (expected2), expected2);
rust_lex_stringish_test (&parser, "\"string\"", "string", STRING);
rust_lex_stringish_test (&parser, "\"str\\ting\"", "str\ting", STRING);
rust_lex_stringish_test (&parser, "br####\"\\x73tring\"####", "\\x73tring",
BYTESTRING);
- for (i = 0; i < ARRAY_SIZE (identifier_tokens); ++i)
- rust_lex_test_one (&parser, identifier_tokens[i].name,
- identifier_tokens[i].value);
+ for (const auto &candidate : identifier_tokens)
+ rust_lex_test_one (&parser, candidate.name, candidate.value);
- for (i = 0; i < ARRAY_SIZE (operator_tokens); ++i)
- rust_lex_test_one (&parser, operator_tokens[i].name,
- operator_tokens[i].value);
+ for (const auto &candidate : operator_tokens)
+ rust_lex_test_one (&parser, candidate.name, candidate.value);
rust_lex_test_completion (&parser);
rust_lex_test_push_back (&parser);