From 211bcd013331fcaefa9bb31b6145ae0df13a3363 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Tue, 2 Mar 2021 15:10:05 +0000 Subject: [PATCH] bfd, ld, libctf: skip zero-refcount strings in CTF string reporting This is a tricky one. BFD, on the linker's behalf, reports symbols to libctf via the ctf_new_symbol and ctf_new_dynsym callbacks, which ultimately call ctf_link_add_linker_symbol. But while this happens after strtab offsets are finalized, it happens before the .dynstr is actually laid out, so we can't iterate over it at this stage and it is not clear what the reported symbols are actually called. So a second callback, examine_strtab, is called after the .dynstr is finalized, which calls ctf_link_add_strtab and ultimately leads to ldelf_ctf_strtab_iter_cb being called back repeatedly until the offsets of every string in the .dynstr is passed to libctf. libctf can then use this to get symbol names out of the input (which usually stores symbol types in the form of a name -> type mapping at this stage) and extract the types of those symbols, feeding them back into their final form as a 1:1 association with the real symtab's STT_OBJ and STT_FUNC symbols (with a few skipped, see ctf_symtab_skippable). This representation is compact, but has one problem: if libctf somehow gets confused about the st_type of a symbol, it'll stick an entry into the function symtypetab when it should put it into the object symtypetab, or vice versa, and *every symbol from that one on* will have the wrong CTF type because it's actually looking up the type for a different symbol. And we have just such a bug. ctf_link_add_strtab was not taking the refcounts of strings into consideration, so even strings that had been eliminated from the strtab by virtue of being in objects eliminated via --as-needed etc were being reported. This is harmful because it can lead to multiple strings with the same apparent offset, and if the last duplicate to be reported relates to an eliminated symbol, we look up the wrong symbol from the input and gets its type wrong: if it's unlucky and the eliminated symbol is also of the wrong st_type, we will end up with a corrupted symtypetab. Thankfully the wrong-st_type case is already diagnosed by a this-can-never-happen paranoid warning: CTF warning: Symbol 61a added to CTF as a function but is of type 1 or the converse * CTF warning: Symbol a3 added to CTF as a data object but is of type 2 so at least we can tell when the corruption has spread to more than one symbol's type. Skipping zero-refcounted strings is easy: teach _bfd_elf_strtab_str to skip them, and ldelf_ctf_strtab_iter_cb to loop over skipped strings until it falls off the end or finds one that isn't skipped. bfd/ChangeLog 2021-03-02 Nick Alcock * elf-strtab.c (_bfd_elf_strtab_str): Skip strings with zero refcount. ld/ChangeLog 2021-03-02 Nick Alcock * ldelfgen.c (ldelf_ctf_strtab_iter_cb): Skip zero-refcount strings. libctf/ChangeLog 2021-03-02 Nick Alcock * ctf-create.c (symtypetab_density): Report the symbol name as well as index in the name != object error; note the likely consequences. * ctf-link.c (ctf_link_shuffle_syms): Report the symbol index as well as name. --- bfd/ChangeLog | 4 ++++ bfd/elf-strtab.c | 4 +++- ld/ChangeLog | 4 ++++ ld/ldelfgen.c | 15 +++++++++++---- libctf/ChangeLog | 8 ++++++++ libctf/ctf-create.c | 16 ++++++++++------ libctf/ctf-link.c | 3 ++- 7 files changed, 42 insertions(+), 12 deletions(-) diff --git a/bfd/ChangeLog b/bfd/ChangeLog index c4c4fa2647a..c56cfc49383 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,7 @@ +2021-03-02 Nick Alcock + + * elf-strtab.c (_bfd_elf_strtab_str): Skip strings with zero refcount. + 2021-03-02 Alan Modra PR 27451 diff --git a/bfd/elf-strtab.c b/bfd/elf-strtab.c index 3e5e1622109..a3cb4ef7b4a 100644 --- a/bfd/elf-strtab.c +++ b/bfd/elf-strtab.c @@ -299,9 +299,11 @@ _bfd_elf_strtab_str (struct elf_strtab_hash *tab, size_t idx, bfd_size_type *offset) { if (idx == 0) - return 0; + return NULL; BFD_ASSERT (idx < tab->size); BFD_ASSERT (tab->sec_size); + if (tab->array[idx]->refcount == 0) + return NULL; if (offset) *offset = tab->array[idx]->u.index; return tab->array[idx]->root.string; diff --git a/ld/ChangeLog b/ld/ChangeLog index d12e537791a..9116ee01170 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,7 @@ +2021-03-02 Nick Alcock + + * ldelfgen.c (ldelf_ctf_strtab_iter_cb): Skip zero-refcount strings. + 2021-03-02 Alan Modra * testsuite/ld-powerpc/startstop.d, diff --git a/ld/ldelfgen.c b/ld/ldelfgen.c index df3dae0abe8..c49b85494ee 100644 --- a/ld/ldelfgen.c +++ b/ld/ldelfgen.c @@ -375,13 +375,20 @@ ldelf_ctf_strtab_iter_cb (uint32_t *offset, void *arg_) if (arg->next_i == 0) arg->next_i = 1; - if (arg->next_i >= _bfd_elf_strtab_len (arg->strtab)) + /* Hunt through strings until we fall off the end or find one with + a nonzero refcount. */ + do { - arg->next_i = 0; - return NULL; + if (arg->next_i >= _bfd_elf_strtab_len (arg->strtab)) + { + arg->next_i = 0; + return NULL; + } + + ret = _bfd_elf_strtab_str (arg->strtab, arg->next_i++, &off); } + while (ret == NULL); - ret = _bfd_elf_strtab_str (arg->strtab, arg->next_i++, &off); *offset = off; /* If we've overflowed, we cannot share any further strings: the CTF diff --git a/libctf/ChangeLog b/libctf/ChangeLog index 8fa98c68a82..f095c3e1e30 100644 --- a/libctf/ChangeLog +++ b/libctf/ChangeLog @@ -1,3 +1,11 @@ +2021-03-02 Nick Alcock + + * ctf-create.c (symtypetab_density): Report the symbol name as + well as index in the name != object error; note the likely + consequences. + * ctf-link.c (ctf_link_shuffle_syms): Report the symbol index + as well as name. + 2021-03-02 Nick Alcock * ctf-link.c (ctf_link_shuffle_syms): Free ctf_dynsyms properly. diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c index d417922e7fd..3f2c5dacb03 100644 --- a/libctf/ctf-create.c +++ b/libctf/ctf-create.c @@ -318,18 +318,22 @@ symtypetab_density (ctf_dict_t *fp, ctf_dict_t *symfp, ctf_dynhash_t *symhash, if ((flags & CTF_SYMTYPETAB_EMIT_FUNCTION) && sym->st_type != STT_FUNC) { - ctf_err_warn (fp, 1, 0, _("Symbol %x added to CTF as a function " - "but is of type %x\n"), - sym->st_symidx, sym->st_type); + ctf_err_warn (fp, 1, 0, _("symbol %s (%x) added to CTF as a " + "function but is of type %x. " + "The symbol type lookup tables " + "are probably corrupted"), + sym->st_name, sym->st_symidx, sym->st_type); ctf_dynhash_remove (symhash, name); continue; } else if (!(flags & CTF_SYMTYPETAB_EMIT_FUNCTION) && sym->st_type != STT_OBJECT) { - ctf_err_warn (fp, 1, 0, _("Symbol %x added to CTF as a data " - "object but is of type %x\n"), - sym->st_symidx, sym->st_type); + ctf_err_warn (fp, 1, 0, _("symbol %s (%x) added to CTF as a " + "data object but is of type %x. " + "The symbol type lookup tables " + "are probably corrupted"), + sym->st_name, sym->st_symidx, sym->st_type); ctf_dynhash_remove (symhash, name); continue; } diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c index 882d4297e4f..5471fccd0f7 100644 --- a/libctf/ctf-link.c +++ b/libctf/ctf-link.c @@ -1552,7 +1552,8 @@ ctf_link_shuffle_syms (ctf_dict_t *fp) for skippability here. */ if (!ctf_symtab_skippable (&did->cid_sym)) { - ctf_dprintf ("symbol name from linker: %s\n", did->cid_sym.st_name); + ctf_dprintf ("symbol from linker: %s (%x)\n", did->cid_sym.st_name, + did->cid_sym.st_symidx); if ((new_sym = malloc (sizeof (ctf_link_sym_t))) == NULL) goto local_oom; -- 2.30.2