#include "rust-demangle.h"
-/* Mangled Rust symbols look like this:
+/* Mangled (legacy) Rust symbols look like this:
_$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
The original symbol is:
">" => $GT$
"(" => $LP$
")" => $RP$
- " " => $u20$
- "\"" => $u22$
- "'" => $u27$
- "+" => $u2b$
- ";" => $u3b$
- "[" => $u5b$
- "]" => $u5d$
- "{" => $u7b$
- "}" => $u7d$
- "~" => $u7e$
+ "\u{XY}" => $uXY$
A double ".." means "::" and a single "." means "-".
static int is_prefixed_hash (const char *start);
static int looks_like_rust (const char *sym, size_t len);
-static int unescape (const char **in, char **out, const char *seq, char value);
+static int parse_lower_hex_nibble (char nibble);
+static char parse_legacy_escape (const char **in);
/* INPUT: sym: symbol that has been through C++ (gnu v3) demangling
const char *end;
char seen[16];
size_t i;
- int count;
+ int count, nibble;
if (strncmp (str, hash_prefix, hash_prefix_len))
return 0;
memset (seen, 0, sizeof(seen));
for (end = str + hash_len; str < end; str++)
- if (*str >= '0' && *str <= '9')
- seen[*str - '0'] = 1;
- else if (*str >= 'a' && *str <= 'f')
- seen[*str - 'a' + 10] = 1;
- else
- return 0;
+ {
+ nibble = parse_lower_hex_nibble (*str);
+ if (nibble < 0)
+ return 0;
+ seen[nibble] = 1;
+ }
/* Count how many distinct digits seen */
count = 0;
const char *end = str + len;
while (str < end)
- switch (*str)
- {
- case '$':
- if (!strncmp (str, "$C$", 3))
- str += 3;
- else if (!strncmp (str, "$SP$", 4)
- || !strncmp (str, "$BP$", 4)
- || !strncmp (str, "$RF$", 4)
- || !strncmp (str, "$LT$", 4)
- || !strncmp (str, "$GT$", 4)
- || !strncmp (str, "$LP$", 4)
- || !strncmp (str, "$RP$", 4))
- str += 4;
- else if (!strncmp (str, "$u20$", 5)
- || !strncmp (str, "$u22$", 5)
- || !strncmp (str, "$u27$", 5)
- || !strncmp (str, "$u2b$", 5)
- || !strncmp (str, "$u3b$", 5)
- || !strncmp (str, "$u5b$", 5)
- || !strncmp (str, "$u5d$", 5)
- || !strncmp (str, "$u7b$", 5)
- || !strncmp (str, "$u7d$", 5)
- || !strncmp (str, "$u7e$", 5))
- str += 5;
- else
- return 0;
- break;
- case '.':
- /* Do not allow three or more consecutive dots */
- if (!strncmp (str, "...", 3))
- return 0;
- /* Fall through */
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- case '_':
- case ':':
- str++;
- break;
- default:
- return 0;
- }
+ {
+ if (*str == '$')
+ {
+ if (!parse_legacy_escape (&str))
+ return 0;
+ }
+ else if (*str == '.' || *str == '_' || *str == ':' || ISALNUM (*str))
+ str++;
+ else
+ return 0;
+ }
return 1;
}
const char *in;
char *out;
const char *end;
+ char unescaped;
if (!sym)
return;
end = sym + strlen (sym) - (hash_prefix_len + hash_len);
while (in < end)
- switch (*in)
- {
- case '$':
- if (!(unescape (&in, &out, "$C$", ',')
- || unescape (&in, &out, "$SP$", '@')
- || unescape (&in, &out, "$BP$", '*')
- || unescape (&in, &out, "$RF$", '&')
- || unescape (&in, &out, "$LT$", '<')
- || unescape (&in, &out, "$GT$", '>')
- || unescape (&in, &out, "$LP$", '(')
- || unescape (&in, &out, "$RP$", ')')
- || unescape (&in, &out, "$u20$", ' ')
- || unescape (&in, &out, "$u22$", '\"')
- || unescape (&in, &out, "$u27$", '\'')
- || unescape (&in, &out, "$u2b$", '+')
- || unescape (&in, &out, "$u3b$", ';')
- || unescape (&in, &out, "$u5b$", '[')
- || unescape (&in, &out, "$u5d$", ']')
- || unescape (&in, &out, "$u7b$", '{')
- || unescape (&in, &out, "$u7d$", '}')
- || unescape (&in, &out, "$u7e$", '~'))) {
- /* unexpected escape sequence, not looks_like_rust. */
- goto fail;
- }
- break;
- case '_':
- /* If this is the start of a path component and the next
- character is an escape sequence, ignore the underscore. The
- mangler inserts an underscore to make sure the path
- component begins with a XID_Start character. */
- if ((in == sym || in[-1] == ':') && in[1] == '$')
- in++;
- else
- *out++ = *in++;
- break;
- case '.':
- if (in[1] == '.')
- {
- /* ".." becomes "::" */
- *out++ = ':';
- *out++ = ':';
- in += 2;
- }
- else
- {
- /* "." becomes "-" */
- *out++ = '-';
- in++;
- }
- break;
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- case ':':
- *out++ = *in++;
- break;
- default:
- /* unexpected character in symbol, not looks_like_rust. */
- goto fail;
- }
+ {
+ if (*in == '$')
+ {
+ unescaped = parse_legacy_escape (&in);
+ if (unescaped)
+ *out++ = unescaped;
+ else
+ /* unexpected escape sequence, not looks_like_rust. */
+ goto fail;
+ }
+ else if (*in == '_')
+ {
+ /* If this is the start of a path component and the next
+ character is an escape sequence, ignore the underscore. The
+ mangler inserts an underscore to make sure the path
+ component begins with a XID_Start character. */
+ if ((in == sym || in[-1] == ':') && in[1] == '$')
+ in++;
+ else
+ *out++ = *in++;
+ }
+ else if (*in == '.')
+ {
+ if (in[1] == '.')
+ {
+ /* ".." becomes "::" */
+ *out++ = ':';
+ *out++ = ':';
+ in += 2;
+ }
+ else
+ {
+ /* "." becomes "-" */
+ *out++ = '-';
+ in++;
+ }
+ }
+ else if (*in == ':' || ISALNUM (*in))
+ *out++ = *in++;
+ else
+ /* unexpected character in symbol, not looks_like_rust. */
+ goto fail;
+ }
goto done;
fail:
*out = '\0';
}
+/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
static int
-unescape (const char **in, char **out, const char *seq, char value)
+parse_lower_hex_nibble (char nibble)
{
- size_t len = strlen (seq);
+ if ('0' <= nibble && nibble <= '9')
+ return nibble - '0';
+ if ('a' <= nibble && nibble <= 'f')
+ return 0xa + (nibble - 'a');
+ return -1;
+}
- if (strncmp (*in, seq, len))
- return 0;
+/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
+static char
+parse_legacy_escape (const char **in)
+{
+ char c = 0;
+ const char *e;
+ size_t escape_len = 0;
+ int lo_nibble = -1, hi_nibble = -1;
- **out = value;
+ if ((*in)[0] != '$')
+ return 0;
- *in += len;
- *out += 1;
+ e = *in + 1;
+
+ if (e[0] == 'C')
+ {
+ escape_len = 1;
+
+ c = ',';
+ }
+ else
+ {
+ escape_len = 2;
+
+ if (e[0] == 'S' && e[1] == 'P')
+ c = '@';
+ else if (e[0] == 'B' && e[1] == 'P')
+ c = '*';
+ else if (e[0] == 'R' && e[1] == 'F')
+ c = '&';
+ else if (e[0] == 'L' && e[1] == 'T')
+ c = '<';
+ else if (e[0] == 'G' && e[1] == 'T')
+ c = '>';
+ else if (e[0] == 'L' && e[1] == 'P')
+ c = '(';
+ else if (e[0] == 'R' && e[1] == 'P')
+ c = ')';
+ else if (e[0] == 'u')
+ {
+ escape_len = 3;
+
+ hi_nibble = parse_lower_hex_nibble (e[1]);
+ if (hi_nibble < 0)
+ return 0;
+ lo_nibble = parse_lower_hex_nibble (e[2]);
+ if (lo_nibble < 0)
+ return 0;
+
+ /* Only allow non-control ASCII characters. */
+ if (hi_nibble > 7)
+ return 0;
+ c = (hi_nibble << 4) | lo_nibble;
+ if (c < 0x20)
+ return 0;
+ }
+ }
+
+ if (!c || e[escape_len] != '$')
+ return 0;
- return 1;
+ *in += 2 + escape_len;
+ return c;
}