gas: rework handling of backslashes in quoted symbol names
authorJan Beulich <jbeulich@suse.com>
Tue, 4 Jan 2022 09:05:17 +0000 (10:05 +0100)
committerJan Beulich <jbeulich@suse.com>
Tue, 4 Jan 2022 09:05:17 +0000 (10:05 +0100)
Strange effects can result from the present handling, e.g.:

.if 1
"backslash\\":
.endif

yields first (correctly) "missing closing `"'" but then also "invalid
character '\' in mnemonic" and further "end of file inside conditional".
Symbols names ending in \ are in principle not expressable with that
scheme.

Instead of recording whether a backslash was seen, inspect the
subsequent character right away. Only accept \\ (meaning a single
backslash in the resulting symbol name) and \" (meaning an embedded
double quote in the resulting symbol name) for now, warning about any
other combination.

While perhaps not necessary immediately, also permit concatenated
strings to form a symbol name. This may become useful if going forward
we would want to support \<octal> or \x<hex> sequences, where closing
and re-opening quotes can be useful to delimit such sequences.

The ELF "Multibyte symbol names" test gets switched away from using
.set, as that would now also mean excluding nios2 and pru. By using
.equiv instead, even the existing #notarget can be dropped. (For h8300
the .section directive additionally needs attributes specified, to avoid
a target specific warning.)

gas/expr.c
gas/testsuite/gas/all/quoted-sym-names.d
gas/testsuite/gas/all/quoted-sym-names.s
gas/testsuite/gas/elf/syms.d
gas/testsuite/gas/elf/syms.s

index c7f4de5ec1e8b7b1bdb7fb5daf274a9be9497799..1e97a83f27b6b37269abc7f8676155a747fa9236 100644 (file)
@@ -2400,18 +2400,52 @@ get_symbol_name (char ** ilp_return)
     }
   else if (c == '"')
     {
-      bool backslash_seen;
+      char *dst = input_line_pointer;
 
       * ilp_return = input_line_pointer;
-      do
+      for (;;)
        {
-         backslash_seen = c == '\\';
-         c = * input_line_pointer ++;
-       }
-      while (c != 0 && (c != '"' || backslash_seen));
+         c = *input_line_pointer++;
+
+         if (c == 0)
+           {
+             as_warn (_("missing closing '\"'"));
+             break;
+           }
 
-      if (c == 0)
-       as_warn (_("missing closing '\"'"));
+         if (c == '"')
+           {
+             char *ilp_save = input_line_pointer;
+
+             SKIP_WHITESPACE ();
+             if (*input_line_pointer == '"')
+               {
+                 ++input_line_pointer;
+                 continue;
+               }
+             input_line_pointer = ilp_save;
+             break;
+           }
+
+         if (c == '\\')
+           switch (*input_line_pointer)
+             {
+             case '"':
+             case '\\':
+               c = *input_line_pointer++;
+               break;
+
+             default:
+               if (c != 0)
+                 as_warn (_("'\\%c' in quoted symbol name; "
+                            "behavior may change in the future"),
+                          *input_line_pointer);
+               break;
+             }
+
+         *dst++ = c;
+       }
+      *dst = 0;
     }
   *--input_line_pointer = 0;
   return c;
index cf7a9aed72480cb3ad9f4761d39f7cd080119103..5f0d3f8c469357a191e162712373b819ce09ae8e 100644 (file)
@@ -1,6 +1,13 @@
-#nm: --extern-only
+#nm: --extern-only --numeric-sort
 #name: quoted symbol names
+# No quoted strings handling (TC_STRING_ESCAPES set to 0):
+#notarget: powerpc*-*-aix* powerpc*-*-beos* powerpc-*-macos* rs6000-*-*
+# Explicitly no escapes in quoted strings:
+#notarget: z80-*-*
 
 #...
 0+00 T test-a
-
+0+01 T back\\slash
+0+02 T back"slash
+0+03 T backslash\\
+0+04 T backslash"
index b4b6171f1989a08fd293361ec283822f7aef2643..677f5dd3ce45814d5eb329a118ebc5b182df5240 100644 (file)
@@ -1,4 +1,19 @@
        .text
        .globl  "test-a"
 "test-a":
-       .word 0
+       .byte 0
+       .globl  "back\\slash"
+"back\\slash":
+       .byte 0
+       .globl  "back\"slash"
+"back\"slash":
+       .byte 0
+       .globl  "backslash\\"
+"backslash\\":
+       .byte 0
+       .globl  "backslash\""
+"backslash\"":
+       .byte 0
+/*     .globl  "back""slash" */
+"back""slash":
+       .byte 0
index 011075f9384e6feaf0a6e883763b47374e6cc29f..f1f40c10c4b7da4a3d870f714083123e1768fd68 100644 (file)
@@ -1,7 +1,5 @@
 #readelf: -S -s -p .strtab
 #name: Multibyte symbol names
-# The following targets use an unusual .set syntax...
-#notarget: alpha*-*-* h8300-*-*
 
 #...
 Section Headers:
index 977c6bb7c93e8269439578753c2108a4e49d1cc3..0fd960d9734ed41171f8ed71e6ee13f7fce713c1 100644 (file)
@@ -1,5 +1,5 @@
-       .section "sec\xa5\xc2tion"
+       .section "sec\xa5\xc2tion", "a"
        
-       .set "sy\xa5\xc2mbol", .
+       .equiv "sy\xa5\xc2mbol", .
 
        .string8 "str\xa5\xc2ing"