From f9f793d6cce9dc3a80bb138dcf5b789e0916d121 Mon Sep 17 00:00:00 2001
From: Dorothy Chen <dochen@princeton.edu>
Date: Fri, 1 Jul 2016 16:44:36 -0400
Subject: [PATCH] parsed .debug_aranges section, simple interface mapping
 address ranges to CU offsets

fixed bug in finding correct arange entry, given an addr

cosmetic changes per pull request thread #108, and updated relevant tests

minor arange test changes

filled in comments

update test formatting for aranges

update test formatting for aranges

Fix off-by-one bug

For real this time. If the query addr == beginning address, bisect_left won't work.
---
 elftools/dwarf/aranges.py   | 110 ++++++++++++++++++++++++++++++++++++
 elftools/dwarf/dwarfinfo.py |  14 +++++
 elftools/dwarf/structs.py   |  10 ++++
 elftools/elf/elffile.py     |   5 +-
 scripts/readelf.py          |  40 +++++++++++++
 test/run_readelf_tests.py   |  14 +++--
 6 files changed, 186 insertions(+), 7 deletions(-)
 create mode 100644 elftools/dwarf/aranges.py

diff --git a/elftools/dwarf/aranges.py b/elftools/dwarf/aranges.py
new file mode 100644
index 0000000..32c287d
--- /dev/null
+++ b/elftools/dwarf/aranges.py
@@ -0,0 +1,110 @@
+#-------------------------------------------------------------------------------
+# elftools: dwarf/aranges.py
+#
+# DWARF aranges section decoding (.debug_aranges)
+#
+# Dorothy Chen (dorothchen@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import os
+from collections import namedtuple
+from ..common.utils import struct_parse
+from bisect import bisect_right
+import math
+
+# An entry in the aranges table; 
+# begin_addr: The beginning address in the CU
+# length: The length of the address range in this entry
+# info_offset: The CU's offset into .debug_info
+# see 6.1.2 in DWARF4 docs for explanation of the remaining fields
+ARangeEntry = namedtuple('ARangeEntry', 
+    'begin_addr length info_offset unit_length version address_size segment_size')
+
+class ARanges(object):
+    """ ARanges table in DWARF
+
+        stream, size: 
+            A stream holding the .debug_aranges section, and its size
+
+        structs: 
+            A DWARFStructs instance for parsing the data
+    """
+    def __init__(self, stream, size, structs):
+        self.stream = stream
+        self.size = size
+        self.structs = structs
+
+        # Get entries of aranges table in the form of ARangeEntry tuples
+        self.entries = self._get_entries()
+
+        # Sort entries by the beginning address
+        self.entries.sort(key=lambda entry: entry.begin_addr)
+
+        # Create list of keys (first addresses) for better searching
+        self.keys = [entry.begin_addr for entry in self.entries]
+
+
+    def cu_offset_at_addr(self, addr):
+        """ Given an address, get the offset of the CU it belongs to, where
+            'offset' refers to the offset in the .debug_info section.
+        """
+        tup = self.entries[bisect_right(self.keys, addr) - 1]
+        return tup.info_offset
+        
+
+    #------ PRIVATE ------#
+    def _get_entries(self):
+        """ Populate self.entries with ARangeEntry tuples for each range of addresses
+        """
+        self.stream.seek(0)
+        entries = []
+        offset = 0
+
+        # one loop == one "set" == one CU
+        while offset < self.size :
+            aranges_header = struct_parse(self.structs.Dwarf_aranges_header, 
+                self.stream, offset)
+            addr_size = self._get_addr_size_struct(aranges_header["address_size"])
+
+            # No segmentation
+            if aranges_header["segment_size"] == 0:
+                # pad to nearest multiple of tuple size
+                tuple_size = aranges_header["address_size"] * 2 
+                fp = self.stream.tell()
+                seek_to = int(math.ceil(fp/float(tuple_size)) * tuple_size)
+                self.stream.seek(seek_to)
+
+                # entries in this set/CU
+                addr = struct_parse(addr_size('addr'), self.stream)
+                length = struct_parse(addr_size('length'), self.stream)
+                while addr != 0 or length != 0:
+                    # 'begin_addr length info_offset version address_size segment_size'
+                    entries.append(
+                        ARangeEntry(begin_addr=addr, 
+                            length=length, 
+                            info_offset=aranges_header["debug_info_offset"],
+                            unit_length=aranges_header["unit_length"],
+                            version=aranges_header["version"],
+                            address_size=aranges_header["address_size"],
+                            segment_size=aranges_header["segment_size"]))
+                    addr = struct_parse(addr_size('addr'), self.stream)
+                    length = struct_parse(addr_size('length'), self.stream)
+            # Segmentation exists in executable
+            elif aranges_header["segment_size"] != 0:
+                raise NotImplementedError("Segmentation not implemented")
+
+            offset = (offset 
+                + aranges_header.unit_length 
+                + self.structs.initial_length_field_size())
+
+        return entries
+
+    def _get_addr_size_struct(self, addr_header_value):
+        """ Given this set's header value (int) for the address size, 
+            get the Construct representation of that size
+        """
+        if addr_header_value == 4:
+            return self.structs.Dwarf_uint32
+        else: 
+            assert addr_header_value == 8
+            return self.structs.Dwarf_uint64
diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py
index 5a5c41a..330a238 100644
--- a/elftools/dwarf/dwarfinfo.py
+++ b/elftools/dwarf/dwarfinfo.py
@@ -18,6 +18,7 @@ from .lineprogram import LineProgram
 from .callframe import CallFrameInfo
 from .locationlists import LocationLists
 from .ranges import RangeLists
+from .aranges import ARanges
 
 
 # Describes a debug section
@@ -57,6 +58,7 @@ class DWARFInfo(object):
     def __init__(self,
             config,
             debug_info_sec,
+            debug_aranges_sec,
             debug_abbrev_sec,
             debug_frame_sec,
             eh_frame_sec,
@@ -74,6 +76,7 @@ class DWARFInfo(object):
         """
         self.config = config
         self.debug_info_sec = debug_info_sec
+        self.debug_aranges_sec = debug_aranges_sec
         self.debug_abbrev_sec = debug_abbrev_sec
         self.debug_frame_sec = debug_frame_sec
         self.eh_frame_sec = eh_frame_sec
@@ -168,6 +171,17 @@ class DWARFInfo(object):
             base_structs=self.structs)
         return cfi.get_entries()
 
+    def get_aranges(self):
+        """ Get an ARanges object representing the .debug_aranges section of
+            the DWARF data, or None if the section doesn't exist
+        """
+        if self.debug_aranges_sec:
+            return ARanges(self.debug_aranges_sec.stream, 
+                self.debug_aranges_sec.size, 
+                self.structs)
+        else:
+            return None
+
     def location_lists(self):
         """ Get a LocationLists object representing the .debug_loc section of
             the DWARF data, or None if this section doesn't exist.
diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py
index f90a80a..e25dc84 100644
--- a/elftools/dwarf/structs.py
+++ b/elftools/dwarf/structs.py
@@ -132,6 +132,7 @@ class DWARFStructs(object):
         self._create_dw_form()
         self._create_lineprog_header()
         self._create_callframe_entry_headers()
+        self._create_aranges_header()
 
     def _create_initial_length(self):
         def _InitialLength(name):
@@ -210,6 +211,15 @@ class DWARFStructs(object):
             DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
         )
 
+    def _create_aranges_header(self):
+        self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
+            self.Dwarf_initial_length('unit_length'),
+            self.Dwarf_uint16('version'),
+            self.Dwarf_offset('debug_info_offset'), # a little tbd
+            self.Dwarf_uint8('address_size'),
+            self.Dwarf_uint8('segment_size')
+            )
+
     def _create_lineprog_header(self):
         # A file entry is terminated by a NULL byte, so we don't want to parse
         # past it. Therefore an If is used.
diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py
index 2bc2651..d198749 100644
--- a/elftools/elf/elffile.py
+++ b/elftools/elf/elffile.py
@@ -151,7 +151,7 @@ class ELFFile(object):
         # Sections that aren't found will be passed as None to DWARFInfo.
         #
 
-        section_names = ('.debug_info', '.debug_abbrev', '.debug_str',
+        section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str',
                          '.debug_line', '.debug_frame',
                          '.debug_loc', '.debug_ranges')
 
@@ -159,7 +159,7 @@ class ELFFile(object):
         if compressed:
             section_names = tuple(map(lambda x: '.z' + x[1:], section_names))
 
-        debug_info_sec_name, debug_abbrev_sec_name, debug_str_sec_name, \
+        debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name, debug_str_sec_name, \
             debug_line_sec_name, debug_frame_sec_name, debug_loc_sec_name, \
             debug_ranges_sec_name = section_names
 
@@ -182,6 +182,7 @@ class ELFFile(object):
                     default_address_size=self.elfclass // 8,
                     machine_arch=self.get_machine_arch()),
                 debug_info_sec=debug_sections[debug_info_sec_name],
+                debug_aranges_sec=debug_sections[debug_aranges_sec_name],
                 debug_abbrev_sec=debug_sections[debug_abbrev_sec_name],
                 debug_frame_sec=debug_sections[debug_frame_sec_name],
                 # TODO(eliben): reading of eh_frame is not hooked up yet
diff --git a/scripts/readelf.py b/scripts/readelf.py
index 909faff..60c9c3e 100755
--- a/scripts/readelf.py
+++ b/scripts/readelf.py
@@ -662,6 +662,8 @@ class ReadElf(object):
             self._dump_debug_frames()
         elif dump_what == 'frames-interp':
             self._dump_debug_frames_interp()
+        elif dump_what == 'aranges':
+            self._dump_debug_aranges()
         else:
             self._emitline('debug dump not yet supported for "%s"' % dump_what)
 
@@ -979,6 +981,44 @@ class ReadElf(object):
             self._emit(describe_CFI_instructions(entry))
         self._emitline()
 
+    def _dump_debug_aranges(self):
+        """ Dump the aranges table
+        """
+        aranges_table = self._dwarfinfo.get_aranges()
+        if aranges_table == None:
+            return
+        # seems redundent, but we need to get the unsorted set of entries to match system readelf
+        unordered_entries = aranges_table._get_entries()
+       
+        if len(unordered_entries) == 0:
+            self._emitline()
+            self._emitline("Section '.debug_aranges' has no debugging data.")
+            return
+            
+        self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_aranges_sec.name)
+        self._emitline()
+        prev_offset = None
+        for entry in unordered_entries:
+            if prev_offset != entry.info_offset:
+                if entry != unordered_entries[0]:
+                    self._emitline('    %s %s' % (
+                        self._format_hex(0, fullhex=True, lead0x=False), 
+                        self._format_hex(0, fullhex=True, lead0x=False)))
+                self._emitline('  Length:                   %d' % (entry.unit_length))
+                self._emitline('  Version:                  %d' % (entry.version))
+                self._emitline('  Offset into .debug_info:  0x%x' % (entry.info_offset))
+                self._emitline('  Pointer Size:             %d' % (entry.address_size))
+                self._emitline('  Segment Size:             %d' % (entry.segment_size))
+                self._emitline()
+                self._emitline('    Address            Length')
+            self._emitline('    %s %s' % (
+                self._format_hex(entry.begin_addr, fullhex=True, lead0x=False), 
+                self._format_hex(entry.length, fullhex=True, lead0x=False)))
+            prev_offset = entry.info_offset
+        self._emitline('    %s %s' % (
+                self._format_hex(0, fullhex=True, lead0x=False), 
+                self._format_hex(0, fullhex=True, lead0x=False)))
+
     def _dump_debug_frames_interp(self):
         """ Dump the interpreted (decoded) frame information from .debug_frame
         """
diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py
index 00d9168..f8990e1 100755
--- a/test/run_readelf_tests.py
+++ b/test/run_readelf_tests.py
@@ -7,7 +7,7 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
-import os, sys
+import os, sys, platform
 import re
 from difflib import SequenceMatcher
 from optparse import OptionParser
@@ -26,9 +26,12 @@ testlog.addHandler(logging.StreamHandler(sys.stdout))
 # Set the path for calling readelf. We carry our own version of readelf around,
 # because binutils tend to change its output even between daily builds of the
 # same minor release and keeping track is a headache.
-READELF_PATH = 'test/external_tools/readelf'
-if not os.path.exists(READELF_PATH):
-    READELF_PATH = 'readelf'
+if platform.system() == "Darwin": # MacOS
+    READELF_PATH = 'greadelf'
+else:
+    READELF_PATH = 'test/external_tools/readelf'
+    if not os.path.exists(READELF_PATH):
+        READELF_PATH = 'readelf'
 
 def discover_testfiles(rootdir):
     """ Discover test files in the given directory. Yield them one by one.
@@ -48,7 +51,8 @@ def run_test_on_file(filename, verbose=False):
     for option in [
             '-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V',
             '--debug-dump=info', '--debug-dump=decodedline',
-            '--debug-dump=frames', '--debug-dump=frames-interp']:
+            '--debug-dump=frames', '--debug-dump=frames-interp',
+            '--debug-dump=aranges']:
         if verbose: testlog.info("..option='%s'" % option)
         # stdouts will be a 2-element list: output of readelf and output
         # of scripts/readelf.py
-- 
2.30.2