From: Andi Kleen Date: Thu, 23 Dec 2021 17:55:07 +0000 (-0800) Subject: Support symbol+offset lookup in addr2line X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fd3c53675c01472caaa4eb976781b9d6d2d3c53d;p=binutils-gdb.git Support symbol+offset lookup in addr2line The Linux kernel usually ouputs symbol+offset instead of plain code addresses these days, to avoid leaking ASLR secrets and to handle dynamically loaded modules. Converting those with addr2line is somewhat involved: it requires looking up the symbol first using nm and then manually compute the offset, and then pass it to addr2line. This patch implements the necessary steps directly in addr2line, by looking up the symbol (with demangling if needed) and computing the offset. It's possible that a symbol is ambigious with a hex number. In this case it uses the symbol lookup if the string contains a +. When it isn't ambigious the + is optional. --- diff --git a/binutils/addr2line.c b/binutils/addr2line.c index 4d0405bbd09..fd2ac80dacd 100644 --- a/binutils/addr2line.c +++ b/binutils/addr2line.c @@ -37,6 +37,7 @@ #include "demangle.h" #include "bucomm.h" #include "elf-bfd.h" +#include "safe-ctype.h" static bool unwind_inlines; /* -i, unwind inlined functions. */ static bool with_addresses; /* -a, show addresses. */ @@ -51,6 +52,7 @@ static int demangle_flags = DMGL_PARAMS | DMGL_ANSI; static int naddr; /* Number of addresses to process. */ static char **addr; /* Hex addresses to process. */ +static long symcount; static asymbol **syms; /* Symbol table. */ static struct option long_options[] = @@ -116,7 +118,6 @@ static void slurp_symtab (bfd *abfd) { long storage; - long symcount; bool dynamic = false; if ((bfd_get_file_flags (abfd) & HAS_SYMS) == 0) @@ -220,32 +221,94 @@ find_offset_in_section (bfd *abfd, asection *section) &line, &discriminator); } -/* Read hexadecimal addresses from stdin, translate into +/* Lookup a symbol with offset in symbol table. */ + +static bfd_vma +lookup_symbol (bfd *abfd, char *sym, size_t offset) +{ + long i; + + for (i = 0; i < symcount; i++) + { + if (!strcmp (syms[i]->name, sym)) + return syms[i]->value + offset + bfd_asymbol_section (syms[i])->vma; + } + /* Try again mangled */ + for (i = 0; i < symcount; i++) + { + char *d = bfd_demangle (abfd, syms[i]->name, demangle_flags); + bool match = d && !strcmp (d, sym); + free (d); + + if (match) + return syms[i]->value + offset + bfd_asymbol_section (syms[i])->vma; + } + return 0; +} + +/* Split an symbol+offset expression. adr is modified. */ + +static bool +is_symbol (char *adr, char **symp, size_t *offset) +{ + char *end; + + while (ISSPACE (*adr)) + adr++; + if (ISDIGIT (*adr) || *adr == 0) + return false; + /* Could be either symbol or hex number. Check if it has +. */ + if (TOUPPER(*adr) >= 'A' && TOUPPER(*adr) <= 'F' && !strchr (adr, '+')) + return false; + + *symp = adr; + while (*adr && !ISSPACE (*adr) && *adr != '+') + adr++; + end = adr; + while (ISSPACE (*adr)) + adr++; + *offset = 0; + if (*adr == '+') + { + adr++; + *offset = strtoul(adr, NULL, 0); + } + *end = 0; + return true; +} + +/* Read hexadecimal or symbolic with offset addresses from stdin, translate into file_name:line_number and optionally function name. */ static void translate_addresses (bfd *abfd, asection *section) { int read_stdin = (naddr == 0); + char *adr; + char addr_hex[100]; + char *symp; + size_t offset; for (;;) { if (read_stdin) { - char addr_hex[100]; - if (fgets (addr_hex, sizeof addr_hex, stdin) == NULL) break; - pc = bfd_scan_vma (addr_hex, NULL, 16); + adr = addr_hex; } else { if (naddr <= 0) break; --naddr; - pc = bfd_scan_vma (*addr++, NULL, 16); + adr = *addr++; } + if (is_symbol (adr, &symp, &offset)) + pc = lookup_symbol (abfd, symp, offset); + else + pc = bfd_scan_vma (adr, NULL, 16); if (bfd_get_flavour (abfd) == bfd_target_elf_flavour) { const struct elf_backend_data *bed = get_elf_backend_data (abfd); diff --git a/binutils/doc/binutils.texi b/binutils/doc/binutils.texi index 54845c5dc08..288974be386 100644 --- a/binutils/doc/binutils.texi +++ b/binutils/doc/binutils.texi @@ -118,7 +118,7 @@ Demangle encoded C++ symbols (on MS-DOS, this program is named @code{cxxfilt}) @item addr2line -Convert addresses into file names and line numbers +Convert addresses or symbol+offset into file names and line numbers @item windres Manipulate Windows resources @@ -146,7 +146,7 @@ in the section entitled ``GNU Free Documentation License''. * strip:: Discard symbols * c++filt:: Filter to demangle encoded C++ symbols * cxxfilt: c++filt. MS-DOS name for c++filt -* addr2line:: Convert addresses to file and line +* addr2line:: Convert addresses or symbol+offset to file and line * windmc:: Generator for Windows message resources * windres:: Manipulate Windows resources * dlltool:: Create files needed to build and use DLLs @@ -3902,7 +3902,7 @@ c++filt @var{option} @var{symbol} @kindex addr2line @cindex address to file name and line number -@c man title addr2line convert addresses into file names and line numbers +@c man title addr2line convert addresses or symbol+offset into file names and line numbers @smallexample @c man begin SYNOPSIS addr2line @@ -3923,8 +3923,8 @@ addr2line [@option{-a}|@option{--addresses}] @c man begin DESCRIPTION addr2line -@command{addr2line} translates addresses into file names and line numbers. -Given an address in an executable or an offset in a section of a relocatable +@command{addr2line} translates addresses or symbol+offset into file names and line numbers. +Given an address or symbol+offset in an executable or an offset in a section of a relocatable object, it uses the debugging information to figure out which file name and line number are associated with it. @@ -3934,11 +3934,11 @@ object to use is specified with the @option{-j} option. @command{addr2line} has two modes of operation. -In the first, hexadecimal addresses are specified on the command line, +In the first, hexadecimal addresses or symbol+offset are specified on the command line, and @command{addr2line} displays the file name and line number for each address. -In the second, @command{addr2line} reads hexadecimal addresses from +In the second, @command{addr2line} reads hexadecimal addresses or symbol+offset from standard input, and prints the file name and line number for each address on standard output. In this mode, @command{addr2line} may be used in a pipe to convert dynamically chosen addresses. @@ -3975,6 +3975,10 @@ If the file name or function name can not be determined, @command{addr2line} will print two question marks in their place. If the line number can not be determined, @command{addr2line} will print 0. +When symbol+offset is used, +offset is optional, except when the symbol +is ambigious with a hex number. The resolved symbols can be mangled +or unmangled, except unmangled symbols with + are not allowed. + @c man end @c man begin OPTIONS addr2line