From 84096498a7bd788599d4a7ca63543fc7c297645e Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Fri, 13 Nov 2020 18:55:45 +0000 Subject: [PATCH] libiberty: Support the new ("v0") mangling scheme in rust-demangle This is the libiberty (mainly for binutils/gdb) counterpart of https://github.com/alexcrichton/rustc-demangle/pull/23. Relevant links for the new Rust mangling scheme (aka "v0"): * Rust RFC: https://github.com/rust-lang/rfcs/pull/2603 * tracking issue: https://github.com/rust-lang/rust/issues/60705 * implementation: https://github.com/rust-lang/rust/pull/57967 This implementation includes full support for UTF-8 identifiers via punycode, so I've included a testcase for that as well. libiberty/ChangeLog: * rust-demangle.c (struct rust_demangler): Add skipping_printing and bound_lifetime_depth fields. (eat): Add (v0-only). (parse_integer_62): Add (v0-only). (parse_opt_integer_62): Add (v0-only). (parse_disambiguator): Add (v0-only). (struct rust_mangled_ident): Add punycode{,_len} fields. (parse_ident): Support v0 identifiers. (print_str): Respect skipping_printing. (print_uint64): Add (v0-only). (print_uint64_hex): Add (v0-only). (print_ident): Respect skipping_printing, Support v0 identifiers. (print_lifetime_from_index): Add (v0-only). (demangle_binder): Add (v0-only). (demangle_path): Add (v0-only). (demangle_generic_arg): Add (v0-only). (demangle_type): Add (v0-only). (demangle_path_maybe_open_generics): Add (v0-only). (demangle_dyn_trait): Add (v0-only). (demangle_const): Add (v0-only). (demangle_const_uint): Add (v0-only). (basic_type): Add (v0-only). (rust_demangle_callback): Support v0 symbols. * testsuite/rust-demangle-expected: Add v0 testcases. --- libiberty/rust-demangle.c | 1010 +++++++++++++++++++- libiberty/testsuite/rust-demangle-expected | 134 ++- 2 files changed, 1133 insertions(+), 11 deletions(-) diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c index b87365c85fe..08c615f6d8b 100644 --- a/libiberty/rust-demangle.c +++ b/libiberty/rust-demangle.c @@ -1,6 +1,7 @@ /* Demangler for the Rust programming language Copyright (C) 2016-2020 Free Software Foundation, Inc. Written by David Tolnay (dtolnay@gmail.com). + Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support. This file is part of the libiberty library. Libiberty is free software; you can redistribute it and/or @@ -64,11 +65,16 @@ struct rust_demangler /* Non-zero if any error occurred. */ int errored; + /* Non-zero if nothing should be printed. */ + int skipping_printing; + /* Non-zero if printing should be verbose (e.g. include hashes). */ int verbose; /* Rust mangling version, with legacy mangling being -1. */ int version; + + uint64_t bound_lifetime_depth; }; /* Parsing functions. */ @@ -81,6 +87,18 @@ peek (const struct rust_demangler *rdm) return 0; } +static int +eat (struct rust_demangler *rdm, char c) +{ + if (peek (rdm) == c) + { + rdm->next++; + return 1; + } + else + return 0; +} + static char next (struct rust_demangler *rdm) { @@ -92,11 +110,87 @@ next (struct rust_demangler *rdm) return c; } +static uint64_t +parse_integer_62 (struct rust_demangler *rdm) +{ + char c; + uint64_t x; + + if (eat (rdm, '_')) + return 0; + + x = 0; + while (!eat (rdm, '_')) + { + c = next (rdm); + x *= 62; + if (ISDIGIT (c)) + x += c - '0'; + else if (ISLOWER (c)) + x += 10 + (c - 'a'); + else if (ISUPPER (c)) + x += 10 + 26 + (c - 'A'); + else + { + rdm->errored = 1; + return 0; + } + } + return x + 1; +} + +static uint64_t +parse_opt_integer_62 (struct rust_demangler *rdm, char tag) +{ + if (!eat (rdm, tag)) + return 0; + return 1 + parse_integer_62 (rdm); +} + +static uint64_t +parse_disambiguator (struct rust_demangler *rdm) +{ + return parse_opt_integer_62 (rdm, 's'); +} + +static size_t +parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value) +{ + char c; + size_t hex_len; + + hex_len = 0; + *value = 0; + + while (!eat (rdm, '_')) + { + *value <<= 4; + + c = next (rdm); + if (ISDIGIT (c)) + *value |= c - '0'; + else if (c >= 'a' && c <= 'f') + *value |= 10 + (c - 'a'); + else + { + rdm->errored = 1; + return 0; + } + hex_len++; + } + + return hex_len; +} + struct rust_mangled_ident { /* ASCII part of the identifier. */ const char *ascii; size_t ascii_len; + + /* Punycode insertion codes for Unicode codepoints, if any. */ + const char *punycode; + size_t punycode_len; }; static struct rust_mangled_ident @@ -104,10 +198,16 @@ parse_ident (struct rust_demangler *rdm) { char c; size_t start, len; + int is_punycode = 0; struct rust_mangled_ident ident; ident.ascii = NULL; ident.ascii_len = 0; + ident.punycode = NULL; + ident.punycode_len = 0; + + if (rdm->version != -1) + is_punycode = eat (rdm, 'u'); c = next (rdm); if (!ISDIGIT (c)) @@ -121,6 +221,10 @@ parse_ident (struct rust_demangler *rdm) while (ISDIGIT (peek (rdm))) len = len * 10 + (next (rdm) - '0'); + /* Skip past the optional `_` separator (v0). */ + if (rdm->version != -1) + eat (rdm, '_'); + start = rdm->next; rdm->next += len; /* Check for overflows. */ @@ -133,6 +237,27 @@ parse_ident (struct rust_demangler *rdm) ident.ascii = rdm->sym + start; ident.ascii_len = len; + if (is_punycode) + { + ident.punycode_len = 0; + while (ident.ascii_len > 0) + { + ident.ascii_len--; + + /* The last '_' is a separator between ascii & punycode. */ + if (ident.ascii[ident.ascii_len] == '_') + break; + + ident.punycode_len++; + } + if (!ident.punycode_len) + { + rdm->errored = 1; + return ident; + } + ident.punycode = ident.ascii + (len - ident.punycode_len); + } + if (ident.ascii_len == 0) ident.ascii = NULL; @@ -144,12 +269,28 @@ parse_ident (struct rust_demangler *rdm) static void print_str (struct rust_demangler *rdm, const char *data, size_t len) { - if (!rdm->errored) + if (!rdm->errored && !rdm->skipping_printing) rdm->callback (data, len, rdm->callback_opaque); } #define PRINT(s) print_str (rdm, s, strlen (s)) +static void +print_uint64 (struct rust_demangler *rdm, uint64_t x) +{ + char s[21]; + snprintf (s, 21, "%" PRIu64, x); + PRINT (s); +} + +static void +print_uint64_hex (struct rust_demangler *rdm, uint64_t x) +{ + char s[17]; + snprintf (s, 17, "%" PRIx64, x); + PRINT (s); +} + /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */ static int decode_lower_hex_nibble (char nibble) @@ -230,9 +371,14 @@ static void print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) { char unescaped; - size_t len; - - if (rdm->errored) + uint8_t *out, *p, d; + size_t len, cap, punycode_pos, j; + /* Punycode parameters and state. */ + uint32_t c; + size_t base, t_min, t_max, skew, damp, bias, i; + size_t delta, w, k, t; + + if (rdm->errored || rdm->skipping_printing) return; if (rdm->version == -1) @@ -273,8 +419,7 @@ print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) } else { - /* "." becomes "-" */ - PRINT ("-"); + PRINT ("."); len = 1; } } @@ -294,6 +439,830 @@ print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) return; } + + if (!ident.punycode) + { + print_str (rdm, ident.ascii, ident.ascii_len); + return; + } + + len = 0; + cap = 4; + while (cap < ident.ascii_len) + { + cap *= 2; + /* Check for overflows. */ + if ((cap * 4) / 4 != cap) + { + rdm->errored = 1; + return; + } + } + + /* Store the output codepoints as groups of 4 UTF-8 bytes. */ + out = (uint8_t *)malloc (cap * 4); + if (!out) + { + rdm->errored = 1; + return; + } + + /* Populate initial output from ASCII fragment. */ + for (len = 0; len < ident.ascii_len; len++) + { + p = out + 4 * len; + p[0] = 0; + p[1] = 0; + p[2] = 0; + p[3] = ident.ascii[len]; + } + + /* Punycode parameters and initial state. */ + base = 36; + t_min = 1; + t_max = 26; + skew = 38; + damp = 700; + bias = 72; + i = 0; + c = 0x80; + + punycode_pos = 0; + while (punycode_pos < ident.punycode_len) + { + /* Read one delta value. */ + delta = 0; + w = 1; + k = 0; + do + { + k += base; + t = k < bias ? 0 : (k - bias); + if (t < t_min) + t = t_min; + if (t > t_max) + t = t_max; + + if (punycode_pos >= ident.punycode_len) + goto cleanup; + d = ident.punycode[punycode_pos++]; + + if (ISLOWER (d)) + d = d - 'a'; + else if (ISDIGIT (d)) + d = 26 + (d - '0'); + else + { + rdm->errored = 1; + goto cleanup; + } + + delta += d * w; + w *= base - t; + } + while (d >= t); + + /* Compute the new insert position and character. */ + len++; + i += delta; + c += i / len; + i %= len; + + /* Ensure enough space is available. */ + if (cap < len) + { + cap *= 2; + /* Check for overflows. */ + if ((cap * 4) / 4 != cap || cap < len) + { + rdm->errored = 1; + goto cleanup; + } + } + p = (uint8_t *)realloc (out, cap * 4); + if (!p) + { + rdm->errored = 1; + goto cleanup; + } + out = p; + + /* Move the characters after the insert position. */ + p = out + i * 4; + memmove (p + 4, p, (len - i - 1) * 4); + + /* Insert the new character, as UTF-8 bytes. */ + p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0; + p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0; + p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f); + p[3] = 0x80 | (c & 0x3f); + + /* If there are no more deltas, decoding is complete. */ + if (punycode_pos == ident.punycode_len) + break; + + i++; + + /* Perform bias adaptation. */ + delta /= damp; + damp = 2; + + delta += delta / len; + k = 0; + while (delta > ((base - t_min) * t_max) / 2) + { + delta /= base - t_min; + k += base; + } + bias = k + ((base - t_min + 1) * delta) / (delta + skew); + } + + /* Remove all the 0 bytes to leave behind an UTF-8 string. */ + for (i = 0, j = 0; i < len * 4; i++) + if (out[i] != 0) + out[j++] = out[i]; + + print_str (rdm, (const char *)out, j); + +cleanup: + free (out); +} + +/* Print the lifetime according to the previously decoded index. + An index of `0` always refers to `'_`, but starting with `1`, + indices refer to late-bound lifetimes introduced by a binder. */ +static void +print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt) +{ + char c; + uint64_t depth; + + PRINT ("'"); + if (lt == 0) + { + PRINT ("_"); + return; + } + + depth = rdm->bound_lifetime_depth - lt; + /* Try to print lifetimes alphabetically first. */ + if (depth < 26) + { + c = 'a' + depth; + print_str (rdm, &c, 1); + } + else + { + /* Use `'_123` after running out of letters. */ + PRINT ("_"); + print_uint64 (rdm, depth); + } +} + +/* Demangling functions. */ + +static void demangle_binder (struct rust_demangler *rdm); +static void demangle_path (struct rust_demangler *rdm, int in_value); +static void demangle_generic_arg (struct rust_demangler *rdm); +static void demangle_type (struct rust_demangler *rdm); +static int demangle_path_maybe_open_generics (struct rust_demangler *rdm); +static void demangle_dyn_trait (struct rust_demangler *rdm); +static void demangle_const (struct rust_demangler *rdm); +static void demangle_const_uint (struct rust_demangler *rdm); +static void demangle_const_int (struct rust_demangler *rdm); +static void demangle_const_bool (struct rust_demangler *rdm); +static void demangle_const_char (struct rust_demangler *rdm); + +/* Optionally enter a binder ('G') for late-bound lifetimes, + printing e.g. `for<'a, 'b> `, and make those lifetimes visible + to the caller (via depth level, which the caller should reset). */ +static void +demangle_binder (struct rust_demangler *rdm) +{ + uint64_t i, bound_lifetimes; + + if (rdm->errored) + return; + + bound_lifetimes = parse_opt_integer_62 (rdm, 'G'); + if (bound_lifetimes > 0) + { + PRINT ("for<"); + for (i = 0; i < bound_lifetimes; i++) + { + if (i > 0) + PRINT (", "); + rdm->bound_lifetime_depth++; + print_lifetime_from_index (rdm, 1); + } + PRINT ("> "); + } +} + +static void +demangle_path (struct rust_demangler *rdm, int in_value) +{ + char tag, ns; + int was_skipping_printing; + size_t i, backref, old_next; + uint64_t dis; + struct rust_mangled_ident name; + + if (rdm->errored) + return; + + switch (tag = next (rdm)) + { + case 'C': + dis = parse_disambiguator (rdm); + name = parse_ident (rdm); + + print_ident (rdm, name); + if (rdm->verbose) + { + PRINT ("["); + print_uint64_hex (rdm, dis); + PRINT ("]"); + } + break; + case 'N': + ns = next (rdm); + if (!ISLOWER (ns) && !ISUPPER (ns)) + { + rdm->errored = 1; + return; + } + + demangle_path (rdm, in_value); + + dis = parse_disambiguator (rdm); + name = parse_ident (rdm); + + if (ISUPPER (ns)) + { + /* Special namespaces, like closures and shims. */ + PRINT ("::{"); + switch (ns) + { + case 'C': + PRINT ("closure"); + break; + case 'S': + PRINT ("shim"); + break; + default: + print_str (rdm, &ns, 1); + } + if (name.ascii || name.punycode) + { + PRINT (":"); + print_ident (rdm, name); + } + PRINT ("#"); + print_uint64 (rdm, dis); + PRINT ("}"); + } + else + { + /* Implementation-specific/unspecified namespaces. */ + + if (name.ascii || name.punycode) + { + PRINT ("::"); + print_ident (rdm, name); + } + } + break; + case 'M': + case 'X': + /* Ignore the `impl`'s own path.*/ + parse_disambiguator (rdm); + was_skipping_printing = rdm->skipping_printing; + rdm->skipping_printing = 1; + demangle_path (rdm, in_value); + rdm->skipping_printing = was_skipping_printing; + /* fallthrough */ + case 'Y': + PRINT ("<"); + demangle_type (rdm); + if (tag != 'M') + { + PRINT (" as "); + demangle_path (rdm, 0); + } + PRINT (">"); + break; + case 'I': + demangle_path (rdm, in_value); + if (in_value) + PRINT ("::"); + PRINT ("<"); + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (", "); + demangle_generic_arg (rdm); + } + PRINT (">"); + break; + case 'B': + backref = parse_integer_62 (rdm); + if (!rdm->skipping_printing) + { + old_next = rdm->next; + rdm->next = backref; + demangle_path (rdm, in_value); + rdm->next = old_next; + } + break; + default: + rdm->errored = 1; + return; + } +} + +static void +demangle_generic_arg (struct rust_demangler *rdm) +{ + uint64_t lt; + if (eat (rdm, 'L')) + { + lt = parse_integer_62 (rdm); + print_lifetime_from_index (rdm, lt); + } + else if (eat (rdm, 'K')) + demangle_const (rdm); + else + demangle_type (rdm); +} + +static const char * +basic_type (char tag) +{ + switch (tag) + { + case 'b': + return "bool"; + case 'c': + return "char"; + case 'e': + return "str"; + case 'u': + return "()"; + case 'a': + return "i8"; + case 's': + return "i16"; + case 'l': + return "i32"; + case 'x': + return "i64"; + case 'n': + return "i128"; + case 'i': + return "isize"; + case 'h': + return "u8"; + case 't': + return "u16"; + case 'm': + return "u32"; + case 'y': + return "u64"; + case 'o': + return "u128"; + case 'j': + return "usize"; + case 'f': + return "f32"; + case 'd': + return "f64"; + case 'z': + return "!"; + case 'p': + return "_"; + case 'v': + return "..."; + + default: + return NULL; + } +} + +static void +demangle_type (struct rust_demangler *rdm) +{ + char tag; + size_t i, old_next, backref; + uint64_t lt, old_bound_lifetime_depth; + const char *basic; + struct rust_mangled_ident abi; + + if (rdm->errored) + return; + + tag = next (rdm); + + basic = basic_type (tag); + if (basic) + { + PRINT (basic); + return; + } + + switch (tag) + { + case 'R': + case 'Q': + PRINT ("&"); + if (eat (rdm, 'L')) + { + lt = parse_integer_62 (rdm); + if (lt) + { + print_lifetime_from_index (rdm, lt); + PRINT (" "); + } + } + if (tag != 'R') + PRINT ("mut "); + demangle_type (rdm); + break; + case 'P': + case 'O': + PRINT ("*"); + if (tag != 'P') + PRINT ("mut "); + else + PRINT ("const "); + demangle_type (rdm); + break; + case 'A': + case 'S': + PRINT ("["); + demangle_type (rdm); + if (tag == 'A') + { + PRINT ("; "); + demangle_const (rdm); + } + PRINT ("]"); + break; + case 'T': + PRINT ("("); + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (", "); + demangle_type (rdm); + } + if (i == 1) + PRINT (","); + PRINT (")"); + break; + case 'F': + old_bound_lifetime_depth = rdm->bound_lifetime_depth; + demangle_binder (rdm); + + if (eat (rdm, 'U')) + PRINT ("unsafe "); + + if (eat (rdm, 'K')) + { + if (eat (rdm, 'C')) + { + abi.ascii = "C"; + abi.ascii_len = 1; + } + else + { + abi = parse_ident (rdm); + if (!abi.ascii || abi.punycode) + { + rdm->errored = 1; + goto restore; + } + } + + PRINT ("extern \""); + + /* If the ABI had any `-`, they were replaced with `_`, + so the parts between `_` have to be re-joined with `-`. */ + for (i = 0; i < abi.ascii_len; i++) + { + if (abi.ascii[i] == '_') + { + print_str (rdm, abi.ascii, i); + PRINT ("-"); + abi.ascii += i + 1; + abi.ascii_len -= i + 1; + i = 0; + } + } + print_str (rdm, abi.ascii, abi.ascii_len); + + PRINT ("\" "); + } + + PRINT ("fn("); + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (", "); + demangle_type (rdm); + } + PRINT (")"); + + if (eat (rdm, 'u')) + { + /* Skip printing the return type if it's 'u', i.e. `()`. */ + } + else + { + PRINT (" -> "); + demangle_type (rdm); + } + + /* Restore `bound_lifetime_depth` to outside the binder. */ + restore: + rdm->bound_lifetime_depth = old_bound_lifetime_depth; + break; + case 'D': + PRINT ("dyn "); + + old_bound_lifetime_depth = rdm->bound_lifetime_depth; + demangle_binder (rdm); + + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (" + "); + demangle_dyn_trait (rdm); + } + + /* Restore `bound_lifetime_depth` to outside the binder. */ + rdm->bound_lifetime_depth = old_bound_lifetime_depth; + + if (!eat (rdm, 'L')) + { + rdm->errored = 1; + return; + } + lt = parse_integer_62 (rdm); + if (lt) + { + PRINT (" + "); + print_lifetime_from_index (rdm, lt); + } + break; + case 'B': + backref = parse_integer_62 (rdm); + if (!rdm->skipping_printing) + { + old_next = rdm->next; + rdm->next = backref; + demangle_type (rdm); + rdm->next = old_next; + } + break; + default: + /* Go back to the tag, so `demangle_path` also sees it. */ + rdm->next--; + demangle_path (rdm, 0); + } +} + +/* A trait in a trait object may have some "existential projections" + (i.e. associated type bindings) after it, which should be printed + in the `<...>` of the trait, e.g. `dyn Trait`. + To this end, this method will keep the `<...>` of an 'I' path + open, by omitting the `>`, and return `Ok(true)` in that case. */ +static int +demangle_path_maybe_open_generics (struct rust_demangler *rdm) +{ + int open; + size_t i, old_next, backref; + + open = 0; + + if (rdm->errored) + return open; + + if (eat (rdm, 'B')) + { + backref = parse_integer_62 (rdm); + if (!rdm->skipping_printing) + { + old_next = rdm->next; + rdm->next = backref; + open = demangle_path_maybe_open_generics (rdm); + rdm->next = old_next; + } + } + else if (eat (rdm, 'I')) + { + demangle_path (rdm, 0); + PRINT ("<"); + open = 1; + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (", "); + demangle_generic_arg (rdm); + } + } + else + demangle_path (rdm, 0); + return open; +} + +static void +demangle_dyn_trait (struct rust_demangler *rdm) +{ + int open; + struct rust_mangled_ident name; + + if (rdm->errored) + return; + + open = demangle_path_maybe_open_generics (rdm); + + while (eat (rdm, 'p')) + { + if (!open) + PRINT ("<"); + else + PRINT (", "); + open = 1; + + name = parse_ident (rdm); + print_ident (rdm, name); + PRINT (" = "); + demangle_type (rdm); + } + + if (open) + PRINT (">"); +} + +static void +demangle_const (struct rust_demangler *rdm) +{ + char ty_tag; + size_t old_next, backref; + + if (rdm->errored) + return; + + if (eat (rdm, 'B')) + { + backref = parse_integer_62 (rdm); + if (!rdm->skipping_printing) + { + old_next = rdm->next; + rdm->next = backref; + demangle_const (rdm); + rdm->next = old_next; + } + return; + } + + ty_tag = next (rdm); + switch (ty_tag) + { + /* Placeholder. */ + case 'p': + PRINT ("_"); + return; + + /* Unsigned integer types. */ + case 'h': + case 't': + case 'm': + case 'y': + case 'o': + case 'j': + demangle_const_uint (rdm); + break; + + /* Signed integer types. */ + case 'a': + case 's': + case 'l': + case 'x': + case 'n': + case 'i': + demangle_const_int (rdm); + break; + + /* Boolean. */ + case 'b': + demangle_const_bool (rdm); + break; + + /* Character. */ + case 'c': + demangle_const_char (rdm); + break; + + default: + rdm->errored = 1; + return; + } + + if (rdm->errored) + return; + + if (rdm->verbose) + { + PRINT (": "); + PRINT (basic_type (ty_tag)); + } +} + +static void +demangle_const_uint (struct rust_demangler *rdm) +{ + size_t hex_len; + uint64_t value; + + if (rdm->errored) + return; + + hex_len = parse_hex_nibbles (rdm, &value); + + if (hex_len > 16) + { + /* Print anything that doesn't fit in `uint64_t` verbatim. */ + PRINT ("0x"); + print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len); + } + else if (hex_len > 0) + print_uint64 (rdm, value); + else + rdm->errored = 1; +} + +static void +demangle_const_int (struct rust_demangler *rdm) +{ + if (eat (rdm, 'n')) + PRINT ("-"); + demangle_const_uint (rdm); +} + +static void +demangle_const_bool (struct rust_demangler *rdm) +{ + uint64_t value; + + if (parse_hex_nibbles (rdm, &value) != 1) + { + rdm->errored = 1; + return; + } + + if (value == 0) + PRINT ("false"); + else if (value == 1) + PRINT ("true"); + else + rdm->errored = 1; +} + +static void +demangle_const_char (struct rust_demangler *rdm) +{ + size_t hex_len; + uint64_t value; + + hex_len = parse_hex_nibbles (rdm, &value); + + if (hex_len == 0 || hex_len > 8) + { + rdm->errored = 1; + return; + } + + /* Match Rust's character "debug" output as best as we can. */ + PRINT ("'"); + if (value == '\t') + PRINT ("\\t"); + else if (value == '\r') + PRINT ("\\r"); + else if (value == '\n') + PRINT ("\\n"); + else if (value > ' ' && value < '~') + /* Rust also considers many non-ASCII codepoints to be printable, but + that logic is not easily ported to C. */ + print_str (rdm, (char *) &value, 1); + else + { + PRINT ("\\u{"); + print_uint64_hex (rdm, value); + PRINT ("}"); + } + PRINT ("'"); } /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits. @@ -345,11 +1314,15 @@ rust_demangle_callback (const char *mangled, int options, rdm.next = 0; rdm.errored = 0; + rdm.skipping_printing = 0; rdm.verbose = (options & DMGL_VERBOSE) != 0; rdm.version = 0; + rdm.bound_lifetime_depth = 0; - /* Rust symbols always start with _ZN (legacy). */ - if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N') + /* Rust symbols always start with _R (v0) or _ZN (legacy). */ + if (rdm.sym[0] == '_' && rdm.sym[1] == 'R') + rdm.sym += 2; + else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N') { rdm.sym += 3; rdm.version = -1; @@ -357,7 +1330,11 @@ rust_demangle_callback (const char *mangled, int options, else return 0; - /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */ + /* Paths (v0) always start with uppercase characters. */ + if (rdm.version != -1 && !ISUPPER (rdm.sym[0])) + return 0; + + /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */ for (p = rdm.sym; *p; p++) { rdm.sym_len++; @@ -365,6 +1342,7 @@ rust_demangle_callback (const char *mangled, int options, if (*p == '_' || ISALNUM (*p)) continue; + /* Legacy Rust symbols can also contain [.:$] characters. */ if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':')) continue; @@ -418,7 +1396,19 @@ rust_demangle_callback (const char *mangled, int options, while (rdm.next < rdm.sym_len); } else - return 0; + { + demangle_path (&rdm, 1); + + /* Skip instantiating crate. */ + if (!rdm.errored && rdm.next < rdm.sym_len) + { + rdm.skipping_printing = 1; + demangle_path (&rdm, 0); + } + + /* It's an error to not reach the end. */ + rdm.errored |= rdm.next != rdm.sym_len; + } return !rdm.errored; } diff --git a/libiberty/testsuite/rust-demangle-expected b/libiberty/testsuite/rust-demangle-expected index 74774794736..7dca315d005 100644 --- a/libiberty/testsuite/rust-demangle-expected +++ b/libiberty/testsuite/rust-demangle-expected @@ -11,7 +11,7 @@ # ############ # -# Coverage Tests +# Coverage Tests (legacy) # # # Demangles as rust symbol. @@ -163,3 +163,135 @@ _ZN63_$LT$core..ptr..Unique$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref17h19f --format=rust _ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h059a991a004536adE issue_60925::foo::Foo::foo +--format=rust +_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h000b1ad6c4f30bd6E +core::ops::function::FnOnce::call_once{{vtable.shim}} +# +############ +# +# Coverage Tests (v0) +# +# +# Crate with a leading digit. +--format=rust +_RNvC6_123foo3bar +123foo::bar +# UTF-8 identifiers. +--format=rust +_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y +utf8_idents::საჭმელად_გემრიელი_სადილი +# Closure path elements. +--format=rust +_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_ +cc::spawn::{closure#0}::{closure#0} +# +--format=rust +_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_ + as core::iter::iterator::Iterator>::rposition::::{closure#0} +# dyn Trait ("trait object") types. +--format=rust +_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std +alloc::alloc::box_free::> +# Types with const generics parameters. +--format=rust +_RNvMC0INtC8arrayvec8ArrayVechKj7b_E3new +>::new +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_8UnsignedKhb_E +> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKs98_E +> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKanb_E +> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb0_E +> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb1_E +> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc76_E +> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKca_E +> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E +> +# +--format=rust +_RNvNvMCs4fqI2P2rA04_13const_genericINtB4_3FooKpE3foo3FOO +>::foo::FOO +# +# All of the tests above but in auto mode instead: +# +# Crate with a leading digit. +--format=auto +_RNvC6_123foo3bar +123foo::bar +# UTF-8 identifiers. +--format=auto +_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y +utf8_idents::საჭმელად_გემრიელი_სადილი +# Closure path elements. +--format=auto +_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_ +cc::spawn::{closure#0}::{closure#0} +# +--format=auto +_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_ + as core::iter::iterator::Iterator>::rposition::::{closure#0} +# dyn Trait ("trait object") types. +--format=auto +_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std +alloc::alloc::box_free::> +# Types with const generics parameters. +--format=auto +_RNvMC0INtC8arrayvec8ArrayVechKj7b_E3new +>::new +# +--format=auto +_RMCs4fqI2P2rA04_13const_genericINtB0_8UnsignedKhb_E +> +# +--format=auto +_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKs98_E +> +# +--format=auto +_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKanb_E +> +# +--format=auto +_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb0_E +> +# +--format=auto +_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb1_E +> +# +--format=auto +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc76_E +> +# +--format=auto +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKca_E +> +# +--format=auto +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E +> +# +--format=auto +_RNvNvMCs4fqI2P2rA04_13const_genericINtB4_3FooKpE3foo3FOO +>::foo::FOO -- 2.30.2