Added dwarf_decode_address.py example
authorEli Bendersky <eliben@gmail.com>
Thu, 5 Jul 2012 03:32:09 +0000 (06:32 +0300)
committerEli Bendersky <eliben@gmail.com>
Thu, 5 Jul 2012 03:32:09 +0000 (06:32 +0300)
CHANGES
elftools/common/py3compat.py
examples/dwarf_decode_address.py [new file with mode: 0644]
examples/reference_output/dwarf_decode_address.out [new file with mode: 0644]

diff --git a/CHANGES b/CHANGES
index 0e52284b5c598741ee990a22caf5d0b6e9cdc23f..dc64e660e8590b7b29a2994128ae922e4b71cabb 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,11 @@
 Changelog
 =========
 
++ Version 0.21 (??)
+
+  - Added new example: dwarf_decode_address - decode function name and
+    file & line information from an address.
+
 + Version 0.20 (27.01.2012)
 
   - Python 3 support
index 9b4529f38d45cc1928d3cd31d15eb1873f508993..d57567d1f7a2a92d8a3963bc43a33e410c62134a 100644 (file)
@@ -28,6 +28,8 @@ if PY3:
     def byte2int(b): return b
 
     ifilter = filter
+
+    maxint = sys.maxsize
 else:
     import cStringIO
     StringIO = BytesIO = cStringIO.StringIO
@@ -45,6 +47,8 @@ else:
 
     from itertools import ifilter
 
+    maxint = sys.maxint
+
 
 def iterkeys(d):
     """Return an iterator over the keys of a dictionary."""
diff --git a/examples/dwarf_decode_address.py b/examples/dwarf_decode_address.py
new file mode 100644 (file)
index 0000000..831b4fc
--- /dev/null
@@ -0,0 +1,86 @@
+#-------------------------------------------------------------------------------
+# elftools example: dwarf_decode_address.py
+#
+# Decode an address in an ELF file to find out which function it belongs to
+# and from which filename/line it comes in the original source file.
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+from __future__ import print_function
+import sys
+
+# If elftools is not installed, maybe we're running from the root or examples
+# dir of the source distribution
+try:
+    import elftools
+except ImportError:
+    sys.path.extend(['.', '..'])
+
+from elftools.common.py3compat import maxint, bytes2str
+from elftools.elf.elffile import ELFFile
+
+
+def process_file(filename, address):
+    print('Processing file:', filename)
+    with open(filename, 'rb') as f:
+        elffile = ELFFile(f)
+
+        if not elffile.has_dwarf_info():
+            print('  file has no DWARF info')
+            return
+
+        # get_dwarf_info returns a DWARFInfo context object, which is the
+        # starting point for all DWARF-based processing in pyelftools.
+        dwarfinfo = elffile.get_dwarf_info()
+
+        funcname = decode_funcname(dwarfinfo, address)
+        file, line = decode_file_line(dwarfinfo, address)
+
+        print('Function:', bytes2str(funcname))
+        print('File:', bytes2str(file))
+        print('Line:', line)
+
+
+def decode_funcname(dwarfinfo, address):
+    # Go over all DIEs in the DWARF information, looking for a subprogram
+    # entry with an address range that includes the given address. Note that
+    # this simplifies things by disregarding subprograms that may have 
+    # split address ranges.
+    for CU in dwarfinfo.iter_CUs():
+        for DIE in CU.iter_DIEs():
+            try:
+                if DIE.tag == 'DW_TAG_subprogram':
+                    lowpc = DIE.attributes['DW_AT_low_pc'].value
+                    highpc = DIE.attributes['DW_AT_high_pc'].value
+                    if lowpc <= address <= highpc:
+                        return DIE.attributes['DW_AT_name'].value
+            except KeyError:
+                continue
+    return None
+
+
+def decode_file_line(dwarfinfo, address):
+    # Go over all the line programs in the DWARF information, looking for
+    # one that describes the given address.
+    for CU in dwarfinfo.iter_CUs():
+        # First, look at line programs to find the file/line for the address
+        lineprog = dwarfinfo.line_program_for_CU(CU)
+        prevaddr = maxint
+        for entry in lineprog.get_entries():
+            # We're interested in those entries where a new state is assigned
+            state = entry.state
+            if state is not None and not state.end_sequence:
+                if prevaddr <= address <= state.address:
+                    filename = lineprog['file_entry'][state.file - 1].name
+                    line = state.line
+                    return filename, line
+                prevaddr = state.address
+    return None, None
+
+
+if __name__ == '__main__':
+    for filename in sys.argv[1:]:
+        # For testing we use a hardcoded address.
+        process_file(filename, 0x400503)
+
diff --git a/examples/reference_output/dwarf_decode_address.out b/examples/reference_output/dwarf_decode_address.out
new file mode 100644 (file)
index 0000000..73ca9ee
--- /dev/null
@@ -0,0 +1,4 @@
+Processing file: ./examples/sample_exe64.elf
+Function: main
+File: z.c
+Line: 4