From 934345e26369527e8936e716b77f1739d78c1ea2 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Sat, 7 Mar 2020 08:24:43 -0500 Subject: [PATCH] examples: Add dwarf_lineprogram_filenames.py (#285) This adds an example of the operation discussed in #283. Usage: python3 ./dwarf_lineprogram_filenames.py --test x.elf y.elf z.elf --- examples/dwarf_lineprogram_filenames.py | 95 +++++++++++++++++++ .../dwarf_lineprogram_filenames.out | 8 ++ 2 files changed, 103 insertions(+) create mode 100644 examples/dwarf_lineprogram_filenames.py create mode 100644 examples/reference_output/dwarf_lineprogram_filenames.out diff --git a/examples/dwarf_lineprogram_filenames.py b/examples/dwarf_lineprogram_filenames.py new file mode 100644 index 0000000..57a8cdd --- /dev/null +++ b/examples/dwarf_lineprogram_filenames.py @@ -0,0 +1,95 @@ +#------------------------------------------------------------------------------- +# elftools example: dwarf_lpe_filenames.py +# +# In the .debug_line section, the Dwarf line program generates a matrix +# of address-source references. This example demonstrates accessing the state +# of each line program entry to retrieve the underlying filenames. +# +# William Woodruff (william@yossarian.net) +# This code is in the public domain +#------------------------------------------------------------------------------- +from __future__ import print_function +from collections import defaultdict +import os +import sys + +# If pyelftools is not installed, the example can also run from the root or +# examples/ dir of the source distribution. +sys.path[0:0] = ['.', '..'] + +from elftools.elf.elffile import ELFFile + + +def process_file(filename): + print('Processing file:', filename) + with open(filename, 'rb') as f: + elffile = ELFFile(f) + + if not elffile.has_dwarf_info(): + print(' file has no DWARF info') + return + + dwarfinfo = elffile.get_dwarf_info() + for CU in dwarfinfo.iter_CUs(): + print(' Found a compile unit at offset %s, length %s' % ( + CU.cu_offset, CU['unit_length'])) + + # Every compilation unit in the DWARF information may or may not + # have a corresponding line program in .debug_line. + line_program = dwarfinfo.line_program_for_CU(CU) + if line_program is None: + print(' DWARF info is missing a line program for this CU') + continue + + # Print a reverse mapping of filename -> #entries + line_entry_mapping(line_program) + + +def line_entry_mapping(line_program): + filename_map = defaultdict(int) + + # The line program, when decoded, returns a list of line program + # entries. Each entry contains a state, which we'll use to build + # a reverse mapping of filename -> #entries. + lp_entries = line_program.get_entries() + for lpe in lp_entries: + # We skip LPEs that don't have an associated file. + # This can happen if instructions in the compiled binary + # don't correspond directly to any original source file. + if not lpe.state or lpe.state.file == 0: + continue + filename = lpe_filename(line_program, lpe.state.file) + filename_map[filename] += 1 + + for filename, lpe_count in filename_map.items(): + print(" filename=%s -> %d entries" % (filename, lpe_count)) + + +def lpe_filename(line_program, file_index): + # Retrieving the filename associated with a line program entry + # involves two levels of indirection: we take the file index from + # the LPE to grab the file_entry from the line program header, + # then take the directory index from the file_entry to grab the + # directory name from the line program header. Finally, we + # join the (base) filename from the file_entry to the directory + # name to get the absolute filename. + lp_header = line_program.header + file_entries = lp_header["file_entry"] + + # File and directory indices are 1-indexed. + file_entry = file_entries[file_index - 1] + dir_index = file_entry["dir_index"] + + # A dir_index of 0 indicates that no absolute directory was recorded during + # compilation; return just the basename. + if dir_index == 0: + return file_entry.name.decode() + + directory = lp_header["include_directory"][dir_index - 1] + return os.path.join(directory, file_entry.name).decode() + + +if __name__ == '__main__': + if sys.argv[1] == '--test': + for filename in sys.argv[2:]: + process_file(filename) diff --git a/examples/reference_output/dwarf_lineprogram_filenames.out b/examples/reference_output/dwarf_lineprogram_filenames.out new file mode 100644 index 0000000..b20bbdd --- /dev/null +++ b/examples/reference_output/dwarf_lineprogram_filenames.out @@ -0,0 +1,8 @@ +Processing file: ./examples/sample_exe64.elf + Found a compile unit at offset 0, length 115 + filename=../sysdeps/x86_64/elf/start.S -> 13 entries + Found a compile unit at offset 119, length 135 + Found a compile unit at offset 258, length 156 + filename=z.c -> 5 entries + Found a compile unit at offset 418, length 300 + filename=elf-init.c -> 15 entries -- 2.30.2