From 4795cd4a26bff8dd3ecf805b81077bdb3bd516c1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 20 Mar 2023 16:59:06 +0100 Subject: [PATCH] x86: drop identifier_chars[] It tries to resemble what's underlying is_part_of_name(), but doesn't quite achieve that: '$' for example is unconditionally marked as part of symbol names, but was included as identifier char for Intel syntax only. Note that i386_att_operand() checks for the immediate prefix first, so the wider coverage by starts_memory_operand() is has no real effect there, but it does matter for something like mov %fs:$dollar, %eax which previously wasn't accepted (but which clearly is a memory reference - there's no point in forcing people to parenthesize the symbol name). Similarly including '%' as an identfier for Intel syntax had no real significance to the rest of the assembler. If '%' was to be valid in (unquoted) symbol names, LEX_PCT would need to be defined. Note further that this also addresses the latent issue of a sub-target defining LEX_AT or LEX_QM to zero: That would make '@' and/or '?' no valid part of symbol names, but would have included them in what is_identifier_char() considers a valid part of a name. (There's a minor related issue which is actually being eliminated: te-interix.h allows '@' only in the middle of symbol names, yet starts_memory_operand() specifically looks at the first character of [possibly] a symbol name.) In parse_real_register() there's no point also checking is_name_ender() as at this point no character is marked solely LEX_END_NAME by any sub- target. Checking is_name_beginner() is also pointless as the hash lookup will fail anyway for a zero-length name. While touching the check in parse_real_register() also drop the "allow_naked_reg" part of the condition: This has only led to inconsistent error messages. --- gas/config/tc-i386.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index fb755943aea..ed8329f25d8 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -531,14 +531,12 @@ const char FLT_CHARS[] = "fFdDxXhHbB"; static char mnemonic_chars[256]; static char register_chars[256]; static char operand_chars[256]; -static char identifier_chars[256]; /* Lexical macros. */ #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x]) #define is_operand_char(x) (operand_chars[(unsigned char) x]) #define is_register_char(x) (register_chars[(unsigned char) x]) #define is_space_char(x) ((x) == ' ') -#define is_identifier_char(x) (identifier_chars[(unsigned char) x]) /* All non-digit non-letter characters that may occur in an operand. */ static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]"; @@ -2611,8 +2609,6 @@ set_intel_syntax (int syntax_flag) expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0); - identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0; - identifier_chars['$'] = intel_syntax ? '$' : 0; register_prefix = allow_naked_reg ? "" : "%"; } @@ -3076,27 +3072,16 @@ md_begin (void) operand_chars[c] = c; #endif - if (ISALPHA (c) || ISDIGIT (c)) - identifier_chars[c] = c; - else if (c >= 128) - { - identifier_chars[c] = c; - operand_chars[c] = c; - } + if (c >= 128) + operand_chars[c] = c; } -#ifdef LEX_AT - identifier_chars['@'] = '@'; -#endif #ifdef LEX_QM - identifier_chars['?'] = '?'; operand_chars['?'] = '?'; #endif mnemonic_chars['_'] = '_'; mnemonic_chars['-'] = '-'; mnemonic_chars['.'] = '.'; - identifier_chars['_'] = '_'; - identifier_chars['.'] = '.'; for (p = operand_special_chars; *p != '\0'; p++) operand_chars[(unsigned char) *p] = *p; @@ -11404,7 +11389,7 @@ RC_SAE_immediate (const char *imm_start) static INLINE bool starts_memory_operand (char c) { return ISDIGIT (c) - || is_identifier_char (c) + || is_name_beginner (c) || strchr ("([\"+-!~", c); } @@ -12896,10 +12881,7 @@ parse_real_register (char *reg_string, char **end_op) s++; } - /* For naked regs, make sure that we are not dealing with an identifier. - This prevents confusing an identifier like `eax_var' with register - `eax'. */ - if (allow_naked_reg && identifier_chars[(unsigned char) *s]) + if (is_part_of_name (*s)) return (const reg_entry *) NULL; *end_op = s; -- 2.30.2