renamed 'location expression' to the more general 'dwarf expression'
[pyelftools.git] / elftools / dwarf / dwarfinfo.py
index 7a4bbc03b324b613ce3df5a02d22fb513a2bb306..d805d555e49ae4d22ed59e8d31000e51eff77cfe 100644 (file)
@@ -8,17 +8,24 @@
 #-------------------------------------------------------------------------------
 from collections import namedtuple
 
-from ..construct import CString
 from ..common.exceptions import DWARFError
-from ..common.utils import struct_parse, dwarf_assert
+from ..common.utils import (struct_parse, dwarf_assert,
+                            parse_cstring_from_stream)
 from .structs import DWARFStructs
 from .compileunit import CompileUnit
 from .abbrevtable import AbbrevTable
+from .lineprogram import LineProgram
 
 
-# Describes a debug section in a stream: offset and size
+# Describes a debug section
+# 
+# stream: a stream object containing the data of this section
+# name: section name in the container file
+# global_offset: the global offset of the section in its container file
+# size: the size of the section's data, in bytes
 #
-DebugSectionLocator = namedtuple('DebugSectionLocator', 'offset size')
+DebugSectionDescriptor = namedtuple('DebugSectionDescriptor', 
+        'stream name global_offset size')
 
 
 class DWARFInfo(object):
@@ -26,30 +33,28 @@ class DWARFInfo(object):
         various parts of the debug infromation.
     """
     def __init__(self,
-            stream,
-            little_endian,
-            debug_info_loc,
-            debug_abbrev_loc,
-            debug_str_loc,
-            debug_line_loc):
+            elffile,
+            debug_info_sec,
+            debug_abbrev_sec,
+            debug_str_sec,
+            debug_line_sec):
         """ stream: 
                 A stream (file-like object) that contains debug sections
             
-            little_endian:
-                Section contents are in little-endian data format
-            
-            debug_*_loc:
-                DebugSectionLocator for this section, specifying where it can
-                be found in the stream
+            elffile:
+                ELFFile reference
+
+            debug_*_sec:
+                DebugSectionDescriptor for a section
         """
-        self.stream = stream
-        self.debug_info_loc = debug_info_loc
-        self.debug_abbrev_loc = debug_abbrev_loc
-        self.debug_str_loc = debug_str_loc
-        self.debug_line_loc = debug_line_loc
-        
-        self.little_endian = little_endian
+        self.elffile = elffile
+        self.debug_info_sec = debug_info_sec
+        self.debug_abbrev_sec = debug_abbrev_sec
+        self.debug_str_sec = debug_str_sec
+        self.debug_line_sec = debug_line_sec
         
+        self.little_endian = self.elffile.little_endian
+
         # This is the DWARFStructs the context uses, so it doesn't depend on 
         # DWARF format and address_size (these are determined per CU) - set them
         # to default values.
@@ -58,33 +63,19 @@ class DWARFInfo(object):
             dwarf_format=32,
             address_size=4)
         
-        # Populate the list with CUs found in debug_info. For each CU only its
-        # header is parsed immediately (the abbrev table isn't loaded before
-        # it's being referenced by one of the CU's DIEs). 
-        # Since there usually aren't many CUs in a single object, this
-        # shouldn't present a performance problem.
-        #
-        self._CU = self._parse_CUs()
+        # A list of CUs. Populated lazily when they're actually requested.
+        self._CUs = None
         
         # Cache for abbrev tables: a dict keyed by offset
         self._abbrevtable_cache = {}
     
-    def num_CUs(self):
-        """ Number of compile units in the debug info
-        """
-        return len(self._CU)
-    
-    def get_CU(self, n):
-        """ Get the compile unit (CompileUnit object) at index #n
-        """
-        return self._CU[n]
-    
     def iter_CUs(self):
         """ Yield all the compile units (CompileUnit objects) in the debug info
         """
-        for i in range(self.num_CUs()):
-            yield self.get_CU(i)
-    
+        if self._CUs is None:
+            self._CUs = self._parse_CUs()
+        return iter(self._CUs)
+
     def get_abbrev_table(self, offset):
         """ Get an AbbrevTable from the given offset in the debug_abbrev
             section.
@@ -98,43 +89,43 @@ class DWARFInfo(object):
             offset will return the same object).
         """
         dwarf_assert(
-            offset < self.debug_abbrev_loc.size,
+            offset < self.debug_abbrev_sec.size,
             "Offset '0x%x' to abbrev table out of section bounds" % offset)
         if offset not in self._abbrevtable_cache:
             self._abbrevtable_cache[offset] = AbbrevTable(
                 structs=self.structs,
-                stream=self.stream,
-                offset=offset + self.debug_abbrev_loc.offset)
+                stream=self.debug_abbrev_sec.stream,
+                offset=offset)
         return self._abbrevtable_cache[offset]
     
-    def info_offset2absolute(self, offset):
-        """ Given an offset into the debug_info section, translate it to an 
-            absolute offset into the stream. Raise an exception if the offset
-            exceeds the section bounds.
-        """
-        dwarf_assert(
-            offset < self.debug_info_loc.size,
-            "Offset '0x%x' to debug_info out of section bounds" % offset)
-        return offset + self.debug_info_loc.offset
-    
     def get_string_from_table(self, offset):
         """ Obtain a string from the string table section, given an offset 
             relative to the section.
         """
-        return struct_parse(
-            CString(''),
-            self.stream,
-            stream_pos=self.debug_str_loc.offset + offset)
+        return parse_cstring_from_stream(self.debug_str_sec.stream, offset)
     
+    def line_program_for_CU(self, CU):
+        """ Given a CU object, fetch the line program it points to from the
+            .debug_line section.
+            If the CU doesn't point to a line program, return None.
+        """
+        # The line program is pointed to by the DW_AT_stmt_list attribute of
+        # the top DIE of a CU.
+        top_DIE = CU.get_top_DIE()
+        if 'DW_AT_stmt_list' in top_DIE.attributes:
+            return self._parse_line_program_at_offset(
+                    top_DIE.attributes['DW_AT_stmt_list'].value, CU.structs)
+        else:
+            return None
+        
     #------ PRIVATE ------#
     
     def _parse_CUs(self):
         """ Parse CU entries from debug_info.
         """
-        offset = self.debug_info_loc.offset
-        section_boundary = self.debug_info_loc.offset + self.debug_info_loc.size
+        offset = 0
         CUlist = []
-        while offset < section_boundary:
+        while offset < self.debug_info_sec.size:
             # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3
             # states that the first 32-bit word of the CU header determines 
             # whether the CU is represented with 32-bit or 64-bit DWARF format.
@@ -144,7 +135,7 @@ class DWARFInfo(object):
             # instance suitable for this CU and use it to parse the rest.
             #
             initial_length = struct_parse(
-                self.structs.Dwarf_uint32(''), self.stream, offset)
+                self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset)
             dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
             
             # At this point we still haven't read the whole header, so we don't
@@ -159,14 +150,14 @@ class DWARFInfo(object):
                 address_size=4)
             
             cu_header = struct_parse(
-                cu_structs.Dwarf_CU_header, self.stream, offset)
+                cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset)
             if cu_header['address_size'] == 8:
                 cu_structs = DWARFStructs(
                     little_endian=self.little_endian,
                     dwarf_format=dwarf_format,
                      address_size=8)
             
-            cu_die_offset = self.stream.tell()
+            cu_die_offset = self.debug_info_sec.stream.tell()
             dwarf_assert(
                 self._is_supported_version(cu_header['version']),
                 "Expected supported DWARF version. Got '%s'" % cu_header['version'])
@@ -174,13 +165,14 @@ class DWARFInfo(object):
                 header=cu_header,
                 dwarfinfo=self,
                 structs=cu_structs,
+                cu_offset=offset,
                 cu_die_offset=cu_die_offset))
             # Compute the offset of the next CU in the section. The unit_length
             # field of the CU header contains its size not including the length
             # field itself.
             offset = (  offset + 
                         cu_header['unit_length'] + 
-                        cu_structs.initial_lenght_field_size())
+                        cu_structs.initial_length_field_size())
         return CUlist
         
     def _is_supported_version(self, version):
@@ -188,3 +180,24 @@ class DWARFInfo(object):
         """
         return 2 <= version <= 3
 
+    def _parse_line_program_at_offset(self, debug_line_offset, structs):
+        """ Given an offset to the .debug_line section, parse the line program
+            starting at this offset in the section and return it.
+            structs is the DWARFStructs object used to do this parsing.
+        """
+        lineprog_header = struct_parse(
+            structs.Dwarf_lineprog_header,
+            self.debug_line_sec.stream,
+            debug_line_offset)
+
+        # Calculate the offset to the next line program (see DWARF 6.2.4)
+        end_offset = (  debug_line_offset + lineprog_header['unit_length'] +
+                        structs.initial_length_field_size())
+
+        return LineProgram(
+            header=lineprog_header,
+            stream=self.debug_line_sec.stream,
+            structs=structs,
+            program_start_offset=self.debug_line_sec.stream.tell(),
+            program_end_offset=end_offset)
+