Support ZLIB compressed debug sections
authorttsugrii <ttsugrii@fb.com>
Tue, 24 May 2016 01:45:32 +0000 (18:45 -0700)
committerTaras Tsugrii <ttsugrii@fb.com>
Mon, 20 Jun 2016 16:32:09 +0000 (09:32 -0700)
objcopy --compress-debug-sections uses ZLIB compression to reduce debug
sections, which can sometimes be larger than the size of the binary
itself. This change makes pyelftools consider compressed debug sections
when checking for DWARF data.

readelf tool supports other types of compressed sections
(https://github.com/facebook/binutils/blob/master/binutils/readelf.c#L12038)
but their support is outside of scope of this change.

Test plan:
  $ ./test/run_all_unittests.py

Signed-off-by: Stanislas P1kachu Lejay <p1kachu@lse.epita.fr>
elftools/dwarf/dwarfinfo.py
elftools/elf/elffile.py
scripts/readelf.py
test/run_readelf_tests.py
test/testfiles_for_readelf/exe_compressed64.elf [new file with mode: 0644]

index 1995fc88f772efe9a169e058e6e3fc1712c6242a..5a5c41ad263acc6baf6632fa5070646d57426fae 100644 (file)
@@ -191,6 +191,9 @@ class DWARFInfo(object):
     def _parse_CUs_iter(self):
         """ Parse CU entries from debug_info. Yield CUs in order of appearance.
         """
+        if self.debug_info_sec is None:
+            return
+
         offset = 0
         while offset < self.debug_info_sec.size:
             cu = self._parse_CU_at_offset(offset)
index f9171dfdedcf5c0eb5c34d7420888beb2516cc73..0517b578023d7a75546b0588fcb8639b71596d84 100644 (file)
@@ -6,10 +6,13 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
+import io
+import resource
+import struct
+import zlib
 from ..common.py3compat import BytesIO
 from ..common.exceptions import ELFError
 from ..common.utils import struct_parse, elf_assert
-from ..construct import ConstructError
 from .structs import ELFStructs
 from .sections import (
         Section, StringTableSection, SymbolTableSection,
@@ -122,10 +125,11 @@ class ELFFile(object):
 
     def has_dwarf_info(self):
         """ Check whether this file appears to have debugging information.
-            We assume that if it has the debug_info section, it has all theother
-            required sections as well.
+            We assume that if it has the .debug_info or .zdebug_info section, it
+            has all the other required sections as well.
         """
-        return bool(self.get_section_by_name('.debug_info'))
+        return bool(self.get_section_by_name('.debug_info')) or \
+            bool(self.get_section_by_name('.zdebug_info'))
 
     def get_dwarf_info(self, relocate_dwarf_sections=True):
         """ Return a DWARFInfo object representing the debugging information in
@@ -138,32 +142,47 @@ class ELFFile(object):
         # present.
         # Sections that aren't found will be passed as None to DWARFInfo.
         #
+
+        section_names = ('.debug_info', '.debug_abbrev', '.debug_str',
+                         '.debug_line', '.debug_frame',
+                         '.debug_loc', '.debug_ranges')
+
+        compressed = bool(self.get_section_by_name('.zdebug_info'))
+        if compressed:
+            section_names = tuple(map(lambda x: '.z' + x[1:], section_names))
+
+        debug_info_sec_name, debug_abbrev_sec_name, debug_str_sec_name, \
+            debug_line_sec_name, debug_frame_sec_name, debug_loc_sec_name, \
+            debug_ranges_sec_name = section_names
+
         debug_sections = {}
-        for secname in ('.debug_info', '.debug_abbrev', '.debug_str',
-                        '.debug_line', '.debug_frame',
-                        '.debug_loc', '.debug_ranges'):
+        for secname in section_names:
             section = self.get_section_by_name(secname)
             if section is None:
                 debug_sections[secname] = None
             else:
-                debug_sections[secname] = self._read_dwarf_section(
+                dwarf_section = self._read_dwarf_section(
                     section,
                     relocate_dwarf_sections)
+                if compressed:
+                    dwarf_section = self._decompress_dwarf_section(dwarf_section)
+                debug_sections[secname] = dwarf_section
 
         return DWARFInfo(
                 config=DwarfConfig(
                     little_endian=self.little_endian,
                     default_address_size=self.elfclass // 8,
                     machine_arch=self.get_machine_arch()),
-                debug_info_sec=debug_sections['.debug_info'],
-                debug_abbrev_sec=debug_sections['.debug_abbrev'],
-                debug_frame_sec=debug_sections['.debug_frame'],
+                debug_info_sec=debug_sections[debug_info_sec_name],
+                debug_abbrev_sec=debug_sections[debug_abbrev_sec_name],
+                debug_frame_sec=debug_sections[debug_frame_sec_name],
                 # TODO(eliben): reading of eh_frame is not hooked up yet
                 eh_frame_sec=None,
-                debug_str_sec=debug_sections['.debug_str'],
-                debug_loc_sec=debug_sections['.debug_loc'],
-                debug_ranges_sec=debug_sections['.debug_ranges'],
-                debug_line_sec=debug_sections['.debug_line'])
+                debug_str_sec=debug_sections[debug_str_sec_name],
+                debug_loc_sec=debug_sections[debug_loc_sec_name],
+                debug_ranges_sec=debug_sections[debug_ranges_sec_name],
+                debug_line_sec=debug_sections[debug_line_sec_name])
+
 
     def get_machine_arch(self):
         """ Return the machine architecture, as detected from the ELF header.
@@ -376,3 +395,38 @@ class ELFFile(object):
                 name=section.name,
                 global_offset=section['sh_offset'],
                 size=section['sh_size'])
+
+    @staticmethod
+    def _decompress_dwarf_section(section):
+        """ Returns the uncompressed contents of the provided DWARF section.
+        """
+        # TODO: support other compression formats from readelf.c
+        assert section.size > 12, 'Unsupported compression format.'
+
+        section.stream.seek(0)
+        # According to readelf.c the content should contain "ZLIB"
+        # followed by the uncompressed section size - 8 bytes in
+        # big-endian order
+        compression_type = section.stream.read(4)
+        assert compression_type == b'ZLIB', \
+            'Invalid compression type: %r' % (compression_type)
+
+        uncompressed_size = struct.unpack('>Q', section.stream.read(8))[0]
+
+        decompressor = zlib.decompressobj()
+        uncompressed_stream = BytesIO()
+        while True:
+            chunk = section.stream.read(resource.getpagesize())
+            if not chunk:
+                break
+            uncompressed_stream.write(decompressor.decompress(chunk))
+        uncompressed_stream.write(decompressor.flush())
+
+        uncompressed_stream.seek(0, io.SEEK_END)
+        size = uncompressed_stream.tell()
+        assert uncompressed_size == size, \
+                'Wrong uncompressed size: expected %r, but got %r' % (
+                    uncompressed_size, size,
+                )
+
+        return section._replace(stream=uncompressed_stream, size=size)
index 43cb37b6dfd790cc9544b4dbe972eb5208556b7f..909faff9f84c2ba4277f84390b0c43c673ba34b7 100755 (executable)
@@ -835,7 +835,7 @@ class ReadElf(object):
     def _dump_debug_info(self):
         """ Dump the debugging info section.
         """
-        self._emitline('Contents of the .debug_info section:\n')
+        self._emitline('Contents of the %s section:\n' % self._dwarfinfo.debug_info_sec.name)
 
         # Offset of the .debug_info section in the stream
         section_offset = self._dwarfinfo.debug_info_sec.global_offset
@@ -888,7 +888,7 @@ class ReadElf(object):
         """ Dump the (decoded) line programs from .debug_line
             The programs are dumped in the order of the CUs they belong to.
         """
-        self._emitline('Decoded dump of debug contents of section .debug_line:\n')
+        self._emitline('Decoded dump of debug contents of section %s:\n' % self._dwarfinfo.debug_line_sec.name)
 
         for cu in self._dwarfinfo.iter_CUs():
             lineprogram = self._dwarfinfo.line_program_for_CU(cu)
@@ -951,7 +951,7 @@ class ReadElf(object):
         """
         if not self._dwarfinfo.has_CFI():
             return
-        self._emitline('Contents of the .debug_frame section:')
+        self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_frame_sec.name)
 
         for entry in self._dwarfinfo.CFI_entries():
             if isinstance(entry, CIE):
@@ -985,7 +985,7 @@ class ReadElf(object):
         if not self._dwarfinfo.has_CFI():
             return
 
-        self._emitline('Contents of the .debug_frame section:')
+        self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_frame_sec.name)
 
         for entry in self._dwarfinfo.CFI_entries():
             if isinstance(entry, CIE):
index 88869bedf75a65009056415f25ffe531887fbbce..00d91682b8995b2b71220429ac48d974ad7e7815 100755 (executable)
@@ -97,7 +97,8 @@ def compare_output(s1, s2):
         for line in lines:
             if 'of the .eh_frame section' in line:
                 filter_out = True
-            elif 'of the .debug_frame section' in line:
+            elif 'of the .debug_frame section' in line or \
+                'of the .zdebug_frame section' in line:
                 filter_out = False
             if not filter_out:
                 if not line.startswith('unknown: length'):
diff --git a/test/testfiles_for_readelf/exe_compressed64.elf b/test/testfiles_for_readelf/exe_compressed64.elf
new file mode 100644 (file)
index 0000000..2b50086
Binary files /dev/null and b/test/testfiles_for_readelf/exe_compressed64.elf differ