From 5fa0f0b67a3d335d37f4fc9c13d35ecab553cb56 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Sat, 3 Dec 2011 15:50:42 +0200 Subject: [PATCH] changed the method to obtain line programs - now per CU --- elftools/dwarf/dwarfinfo.py | 87 +++++++++++++---------------------- elftools/dwarf/lineprogram.py | 86 +++++++++++++++++++++++++++------- elftools/dwarf/structs.py | 7 ++- z.py | 6 +-- 4 files changed, 110 insertions(+), 76 deletions(-) diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 90ba52a..8ebb47d 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -68,10 +68,6 @@ class DWARFInfo(object): # Cache for abbrev tables: a dict keyed by offset self._abbrevtable_cache = {} - - # A list of parsed line programs. Populated lazily when the line - # programs are actually requested - self._lineprograms = None def iter_CUs(self): """ Yield all the compile units (CompileUnit objects) in the debug info @@ -80,13 +76,6 @@ class DWARFInfo(object): self._CUs = self._parse_CUs() return iter(self._CUs) - def iter_line_programs(self): - """ Yield all the line programs (LineProgram ojects) in the debug info - """ - if self._lineprograms is None: - self._lineprograms = self._parse_line_programs() - return iter(self._lineprograms) - def get_abbrev_table(self, offset): """ Get an AbbrevTable from the given offset in the debug_abbrev section. @@ -115,6 +104,20 @@ class DWARFInfo(object): """ return parse_cstring_from_stream(self.debug_str_sec.stream, offset) + def line_program_for_CU(self, CU): + """ Given a CU object, fetch the line program it points to from the + .debug_line section. + If the CU doesn't point to a line program, return None. + """ + # The line program is pointed to by the DW_AT_stmt_list attribute of + # the top DIE of a CU. + top_DIE = CU.get_top_DIE() + if 'DW_AT_stmt_list' in top_DIE.attributes: + return self._parse_line_program_at_offset( + top_DIE.attributes['DW_AT_stmt_list'], CU.structs) + else: + return None + #------ PRIVATE ------# def _parse_CUs(self): @@ -177,48 +180,24 @@ class DWARFInfo(object): """ return 2 <= version <= 3 - def _parse_line_programs(self): - """ Parse line programs from debug_line + def _parse_line_program_at_offset(self, debug_line_offset, structs): + """ Given an offset to the .debug_line section, parse the line program + starting at this offset in the section and return it. + structs is the DWARFStructs object used to do this parsing. """ - offset = 0 - lineprograms = [] - while offset < self.debug_line_sec.size: - # Similarly to CU parsing, peek at the initial_length field of the - # header to figure out the DWARF format for it. - initial_length = struct_parse( - self.structs.Dwarf_uint32(''), - self.debug_line_sec.stream, - offset) - dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 - - # Prepare the structs for this line program, based on its format - # and the default endianness. The address_size plays no role for - # line programs so we just give it a default value. - lineprog_structs = DWARFStructs( - little_endian=self.little_endian, - dwarf_format=dwarf_format, - address_size=4) - - # Now parse the header fully using up-to-date structs. After this, - # the section stream will point at the beginning of the program - # itself, right after the header. - lineprog_header = struct_parse( - lineprog_structs.Dwarf_lineprog_header, - self.debug_line_sec.stream, - offset) - - # Calculate the offset to the next line program (see DWARF 6.2.4) - end_offset = ( offset + lineprog_header['unit_length'] + - lineprog_structs.initial_length_field_size())) - - lineprograms.append(LineProgram( - header=lineprog_header, - dwarfinfo=self, - structs=lineprog_structs, - program_start_offset=self.debug_line_sec.stream.tell()), - program_end_offset=end_offset) - - offset = end_offset - - return lineprograms + lineprog_header = struct_parse( + structs.Dwarf_lineprog_header, + self.debug_line_sec.stream, + offset) + + # Calculate the offset to the next line program (see DWARF 6.2.4) + end_offset = ( offset + lineprog_header['unit_length'] + + lineprog_structs.initial_length_field_size()) + + return LineProgram( + header=lineprog_header, + dwarfinfo=self, + structs=lineprog_structs, + program_start_offset=self.debug_line_sec.stream.tell(), + program_end_offset=end_offset) diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index ce92075..43d8f90 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -13,6 +13,8 @@ from .constants import * class LineState(object): """ Represents a line program state (or a "row" in the matrix describing debug location information for addresses). + The instance variables of this class are the "state machine registers" + described in section 6.2.2 of DWARFv3 """ def __init__(self, default_is_stmt): self.address = 0 @@ -37,7 +39,9 @@ class LineProgram(object): program_start_offset, program_end_offset): """ header: - The header of this line program + The header of this line program. Note: LineProgram may modify + its header by appending file entries if DW_LNE_define_file + instructions are encountered. dwarfinfo: The DWARFInfo context object which created this one @@ -77,6 +81,15 @@ class LineProgram(object): linetable = [] state = LineState(self.header['default_is_stmt']) + def add_state_to_table(): + # Used by instructions that have to add the current state to the + # line table. After adding, some state registers have to be + # cleared. + linetable.append(state) + state.basic_block = False + state.prologue_end = False + state.epilogue_begin = False + offset = self.program_start_offset while offset < self.program_end_offset: opcode = struct_parse( @@ -87,30 +100,69 @@ class LineProgram(object): # As an exercise in avoiding premature optimization, if...elif # chains are used here for standard and extended opcodes instead # of dispatch tables. This keeps the code much cleaner. Besides, - # the majority of instructions are special opcodes anyway. - if opcode == 0: + # the majority of instructions in a typical program are special + # opcodes anyway. + if opcode >= self.header['opcode_base']: + # Special opcode (follow the recipe in 6.2.5.1) + adjusted_opcode = opcode - self['opcode_base'] + state.address += ((adjusted_opcode / self['line_range']) * + self['minimum_instruction_length']) + self.line += (self['line_base'] + + adjusted_opcode % self['line_range']) + add_state_to_table() + elif opcode == 0: # Extended opcode: start with a zero byte, followed by # instruction size and the instruction itself. - pass - elif opcode < self.header['opcode_base']: + inst_len = struct_parse(self.Dwarf_uleb128, self.stream) + ex_opcode = struct_parse(self.Dwarf_uint8, self.stream) + + if ex_opcode == DW_LNE_end_sequence: + state.end_sequence = True + add_state_to_table(state) + state = LineState() # reset state + elif ex_opcode == DW_LNE_set_address: + operand = struct_parse(self.Dwarf_target_addr, self.stream) + state.address = operand + elif ex_opcode == DW_LNE_define_file: + operand = struct_parse(self.Dwarf_lineprog_file_entry, + self.stream) + self['file_entry'].append(operand) + else: # 0 < opcode < opcode_base # Standard opcode if opcode == DW_LNS_copy: - linetable.append(state) - state.basic_block = False - state.prologue_end = False - state.epilogue_begin = False + add_state_to_table() elif opcode == DW_LNS_advance_pc: operand = struct_parse(self.Dwarf_uleb128, self.stream) state.address += ( operand * self.header['minimum_instruction_length']) - elif opcode = DW_LNS_advance_line: + elif opcode == DW_LNS_advance_line: operand = struct_parse(self.Dwarf_sleb128, self.stream) state.line += operand - # ZZZ! go on now... - else: - # Special opcode - pass - - def _handle_LNS_copy(self, opcode, state, linetable): - pass + elif opcode == DW_LNS_set_file: + operand = struct_parse(self.Dwarf_sleb128, self.stream) + state.file = operand + elif opcode == DW_LNS_set_column: + operand = struct_parse(self.Dwarf_uleb128, self.stream) + state.column = operand + elif opcode == DW_LNS_negate_stmt: + state.is_stmt = not state.is_stmt + elif opcode == DW_LNS_set_basic_block: + state.basic_block = True + elif opcode == DW_LNS_const_add_pc: + adjusted_opcode = 255 - self['opcode_base'] + state.address += ((adjusted_opcode / self['line_range']) * + self['minimum_instruction_length']) + elif opcode == DW_LNS_fixed_advance_pc: + operand = struct_parse(self.Dwarf_uint16, self.stream) + state.address += operand + elif opcode == DW_LNS_set_prologue_end: + state.prologue_end = True + elif opcode == DW_LNS_set_epilogue_begin: + state.epilogue_begin = True + elif opcode == DW_LNS_set_isa: + operand = struct_parse(self.Dwarf_uleb128, self.stream) + state.isa = operand + else: + dwarf_assert(False, 'Invalid standard line program opcode: %s' % ( + opcode,)) diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 6d71657..ebf34c6 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -59,6 +59,9 @@ class DWARFStructs(object): Dwarf_lineprog_header (+): Line program header + + Dwarf_lineprog_file_entry (+): + A single file entry in a line program header or instruction See also the documentation of public methods. """ @@ -188,7 +191,7 @@ class DWARFStructs(object): def _create_lineprog_header(self): # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. - file_entry = Struct('file_entry', + self.Dwarf_lineprog_file_entry = Struct('file_entry', CString('name'), If(lambda ctx: len(ctx.name) != 0, Embed(Struct('', @@ -212,7 +215,7 @@ class DWARFStructs(object): CString('include_directory')), RepeatUntilExcluding( lambda obj, ctx: len(obj.name) == 0, - file_entry), + self.Dwarf_lineprog_file_entry), ) def _make_block_struct(self, length_field): diff --git a/z.py b/z.py index 4bf3709..f603948 100644 --- a/z.py +++ b/z.py @@ -23,7 +23,7 @@ print '===> %s sections!' % efile.num_sections() dwarfinfo = efile.get_dwarf_info() -for lp in dwarfinfo.iter_line_programs(): - print lp - print lp.header +#for lp in dwarfinfo.iter_line_programs(): + #print lp + #print lp.header -- 2.30.2