From 5ed4d49d107c46670a7994711f0284776a35284e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 4 Jan 2022 10:05:17 +0100 Subject: [PATCH] gas: rework handling of backslashes in quoted symbol names Strange effects can result from the present handling, e.g.: .if 1 "backslash\\": .endif yields first (correctly) "missing closing `"'" but then also "invalid character '\' in mnemonic" and further "end of file inside conditional". Symbols names ending in \ are in principle not expressable with that scheme. Instead of recording whether a backslash was seen, inspect the subsequent character right away. Only accept \\ (meaning a single backslash in the resulting symbol name) and \" (meaning an embedded double quote in the resulting symbol name) for now, warning about any other combination. While perhaps not necessary immediately, also permit concatenated strings to form a symbol name. This may become useful if going forward we would want to support \ or \x sequences, where closing and re-opening quotes can be useful to delimit such sequences. The ELF "Multibyte symbol names" test gets switched away from using .set, as that would now also mean excluding nios2 and pru. By using .equiv instead, even the existing #notarget can be dropped. (For h8300 the .section directive additionally needs attributes specified, to avoid a target specific warning.) --- gas/expr.c | 50 ++++++++++++++++++++---- gas/testsuite/gas/all/quoted-sym-names.d | 11 +++++- gas/testsuite/gas/all/quoted-sym-names.s | 17 +++++++- gas/testsuite/gas/elf/syms.d | 2 - gas/testsuite/gas/elf/syms.s | 4 +- 5 files changed, 69 insertions(+), 15 deletions(-) diff --git a/gas/expr.c b/gas/expr.c index c7f4de5ec1e..1e97a83f27b 100644 --- a/gas/expr.c +++ b/gas/expr.c @@ -2400,18 +2400,52 @@ get_symbol_name (char ** ilp_return) } else if (c == '"') { - bool backslash_seen; + char *dst = input_line_pointer; * ilp_return = input_line_pointer; - do + for (;;) { - backslash_seen = c == '\\'; - c = * input_line_pointer ++; - } - while (c != 0 && (c != '"' || backslash_seen)); + c = *input_line_pointer++; + + if (c == 0) + { + as_warn (_("missing closing '\"'")); + break; + } - if (c == 0) - as_warn (_("missing closing '\"'")); + if (c == '"') + { + char *ilp_save = input_line_pointer; + + SKIP_WHITESPACE (); + if (*input_line_pointer == '"') + { + ++input_line_pointer; + continue; + } + input_line_pointer = ilp_save; + break; + } + + if (c == '\\') + switch (*input_line_pointer) + { + case '"': + case '\\': + c = *input_line_pointer++; + break; + + default: + if (c != 0) + as_warn (_("'\\%c' in quoted symbol name; " + "behavior may change in the future"), + *input_line_pointer); + break; + } + + *dst++ = c; + } + *dst = 0; } *--input_line_pointer = 0; return c; diff --git a/gas/testsuite/gas/all/quoted-sym-names.d b/gas/testsuite/gas/all/quoted-sym-names.d index cf7a9aed724..5f0d3f8c469 100644 --- a/gas/testsuite/gas/all/quoted-sym-names.d +++ b/gas/testsuite/gas/all/quoted-sym-names.d @@ -1,6 +1,13 @@ -#nm: --extern-only +#nm: --extern-only --numeric-sort #name: quoted symbol names +# No quoted strings handling (TC_STRING_ESCAPES set to 0): +#notarget: powerpc*-*-aix* powerpc*-*-beos* powerpc-*-macos* rs6000-*-* +# Explicitly no escapes in quoted strings: +#notarget: z80-*-* #... 0+00 T test-a - +0+01 T back\\slash +0+02 T back"slash +0+03 T backslash\\ +0+04 T backslash" diff --git a/gas/testsuite/gas/all/quoted-sym-names.s b/gas/testsuite/gas/all/quoted-sym-names.s index b4b6171f198..677f5dd3ce4 100644 --- a/gas/testsuite/gas/all/quoted-sym-names.s +++ b/gas/testsuite/gas/all/quoted-sym-names.s @@ -1,4 +1,19 @@ .text .globl "test-a" "test-a": - .word 0 + .byte 0 + .globl "back\\slash" +"back\\slash": + .byte 0 + .globl "back\"slash" +"back\"slash": + .byte 0 + .globl "backslash\\" +"backslash\\": + .byte 0 + .globl "backslash\"" +"backslash\"": + .byte 0 +/* .globl "back""slash" */ +"back""slash": + .byte 0 diff --git a/gas/testsuite/gas/elf/syms.d b/gas/testsuite/gas/elf/syms.d index 011075f9384..f1f40c10c4b 100644 --- a/gas/testsuite/gas/elf/syms.d +++ b/gas/testsuite/gas/elf/syms.d @@ -1,7 +1,5 @@ #readelf: -S -s -p .strtab #name: Multibyte symbol names -# The following targets use an unusual .set syntax... -#notarget: alpha*-*-* h8300-*-* #... Section Headers: diff --git a/gas/testsuite/gas/elf/syms.s b/gas/testsuite/gas/elf/syms.s index 977c6bb7c93..0fd960d9734 100644 --- a/gas/testsuite/gas/elf/syms.s +++ b/gas/testsuite/gas/elf/syms.s @@ -1,5 +1,5 @@ - .section "sec\xa5\xc2tion" + .section "sec\xa5\xc2tion", "a" - .set "sy\xa5\xc2mbol", . + .equiv "sy\xa5\xc2mbol", . .string8 "str\xa5\xc2ing" -- 2.30.2