From 1eae573a5d2849ec0f592dd6429dc05e8966f067 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Mon, 26 Dec 2011 06:19:50 +0200 Subject: [PATCH] lazy parsing of CUs --- elftools/dwarf/dwarfinfo.py | 99 ++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index ff99f17..c530b43 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -80,18 +80,13 @@ class DWARFInfo(object): dwarf_format=32, address_size=self.config.default_address_size) - # A list of CUs. Populated lazily when they're actually requested. - self._CUs = None - # Cache for abbrev tables: a dict keyed by offset self._abbrevtable_cache = {} def iter_CUs(self): """ Yield all the compile units (CompileUnit objects) in the debug info """ - if self._CUs is None: - self._CUs = self._parse_CUs() - return iter(self._CUs) + return self._parse_CUs_iter() def get_abbrev_table(self, offset): """ Get an AbbrevTable from the given offset in the debug_abbrev @@ -151,60 +146,64 @@ class DWARFInfo(object): #------ PRIVATE ------# - def _parse_CUs(self): - """ Parse CU entries from debug_info. + def _parse_CUs_iter(self): + """ Parse CU entries from debug_info. Yield CUs in order of appearance. """ offset = 0 - CUlist = [] while offset < self.debug_info_sec.size: - # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3 - # states that the first 32-bit word of the CU header determines - # whether the CU is represented with 32-bit or 64-bit DWARF format. - # - # So we peek at the first word in the CU header to determine its - # dwarf format. Based on it, we then create a new DWARFStructs - # instance suitable for this CU and use it to parse the rest. - # - initial_length = struct_parse( - self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset) - dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 - - # At this point we still haven't read the whole header, so we don't - # know the address_size. Therefore, we're going to create structs - # with a default address_size=4. If, after parsing the header, we - # find out address_size is actually 8, we just create a new structs - # object for this CU. - # + cu = self._parse_CU_at_offset(offset) + # Compute the offset of the next CU in the section. The unit_length + # field of the CU header contains its size not including the length + # field itself. + offset = ( offset + + cu['unit_length'] + + cu.structs.initial_length_field_size()) + yield cu + + def _parse_CU_at_offset(self, offset): + """ Parse and return a CU at the given offset in the debug_info stream. + """ + # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3 + # states that the first 32-bit word of the CU header determines + # whether the CU is represented with 32-bit or 64-bit DWARF format. + # + # So we peek at the first word in the CU header to determine its + # dwarf format. Based on it, we then create a new DWARFStructs + # instance suitable for this CU and use it to parse the rest. + # + initial_length = struct_parse( + self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset) + dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 + + # At this point we still haven't read the whole header, so we don't + # know the address_size. Therefore, we're going to create structs + # with a default address_size=4. If, after parsing the header, we + # find out address_size is actually 8, we just create a new structs + # object for this CU. + # + cu_structs = DWARFStructs( + little_endian=self.config.little_endian, + dwarf_format=dwarf_format, + address_size=4) + + cu_header = struct_parse( + cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset) + if cu_header['address_size'] == 8: cu_structs = DWARFStructs( little_endian=self.config.little_endian, dwarf_format=dwarf_format, - address_size=4) - - cu_header = struct_parse( - cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset) - if cu_header['address_size'] == 8: - cu_structs = DWARFStructs( - little_endian=self.config.little_endian, - dwarf_format=dwarf_format, - address_size=8) - - cu_die_offset = self.debug_info_sec.stream.tell() - dwarf_assert( - self._is_supported_version(cu_header['version']), - "Expected supported DWARF version. Got '%s'" % cu_header['version']) - CUlist.append(CompileUnit( + address_size=8) + + cu_die_offset = self.debug_info_sec.stream.tell() + dwarf_assert( + self._is_supported_version(cu_header['version']), + "Expected supported DWARF version. Got '%s'" % cu_header['version']) + return CompileUnit( header=cu_header, dwarfinfo=self, structs=cu_structs, cu_offset=offset, - cu_die_offset=cu_die_offset)) - # Compute the offset of the next CU in the section. The unit_length - # field of the CU header contains its size not including the length - # field itself. - offset = ( offset + - cu_header['unit_length'] + - cu_structs.initial_length_field_size()) - return CUlist + cu_die_offset=cu_die_offset) def _is_supported_version(self, version): """ DWARF version supported by this parser -- 2.30.2