From bd3e73a193049f0bff58746607b629e01627c76e Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Sat, 25 May 2013 15:47:55 -0700 Subject: [PATCH] More DWARFv4 parsing goodness. Also added some hookups for parsing .eh_frame but this isn't enabled yet, because pyelftools can't yet successfully parse it (have to look at it deeper). I tweaked the build of the gcc48-simple sample to generate .dwarf_frame as well. --- elftools/dwarf/callframe.py | 22 ++++++---- elftools/dwarf/dwarfinfo.py | 22 ++++++++-- elftools/dwarf/structs.py | 38 +++++++++++++----- elftools/elf/elffile.py | 10 +++-- elftools/elf/relocation.py | 6 +++ test/testfiles_for_readelf/gcc48-simple.o | Bin 2968 -> 3312 bytes test/testfiles_for_readelf/gcc48-simple.src.c | 19 +++++++++ 7 files changed, 93 insertions(+), 24 deletions(-) create mode 100644 test/testfiles_for_readelf/gcc48-simple.src.c diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 5b35af6..264adb8 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -26,13 +26,13 @@ class CallFrameInfo(object): Eventually, each entry gets its own structs based on the initial length field it starts with. The address_size, however, is taken from base_structs. This appears to be a limitation of the DWARFv3 - standard, fixed in v4 (where an address_size field exists for each - CFI. A discussion I had on dwarf-discuss confirms this. - Currently for base_structs I simply use the elfclass of the - containing file, but more sophisticated methods are used by - libdwarf and others, such as guessing which CU contains which FDEs - (based on their address ranges) and taking the address_size from - those CUs. + standard, fixed in v4. + A discussion I had on dwarf-discuss confirms this. + So for DWARFv4 we'll take the address size from the CIE header, + but for earlier versions will use the elfclass of the containing + file; more sophisticated methods are used by libdwarf and others, + such as guessing which CU contains which FDEs (based on their + address ranges) and taking the address_size from those CUs. """ def __init__(self, stream, size, base_structs): self.stream = stream @@ -99,6 +99,14 @@ class CallFrameInfo(object): header = struct_parse( header_struct, self.stream, offset) + # If this is DWARF version 4 or later, we can have a more precise + # address size, read from the CIE header. + if entry_structs.dwarf_version >= 4: + entry_structs = DWARFStructs( + little_endian=entry_structs.little_endian, + dwarf_format=entry_structs.dwarf_format, + address_size=header.address_size) + # For convenience, compute the end offset for this entry end_offset = ( offset + header.length + diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 2c6cc89..e5c0e71 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -59,6 +59,7 @@ class DWARFInfo(object): debug_info_sec, debug_abbrev_sec, debug_frame_sec, + eh_frame_sec, debug_str_sec, debug_loc_sec, debug_ranges_sec, @@ -75,6 +76,7 @@ class DWARFInfo(object): self.debug_info_sec = debug_info_sec self.debug_abbrev_sec = debug_abbrev_sec self.debug_frame_sec = debug_frame_sec + self.eh_frame_sec = eh_frame_sec self.debug_str_sec = debug_str_sec self.debug_loc_sec = debug_loc_sec self.debug_ranges_sec = debug_ranges_sec @@ -139,12 +141,12 @@ class DWARFInfo(object): return None def has_CFI(self): - """ Does this dwarf info has a CFI section? + """ Does this dwarf info have a dwarf_frame CFI section? """ return self.debug_frame_sec is not None def CFI_entries(self): - """ Get a list of CFI entries from the .debug_frame section. + """ Get a list of dwarf_frame CFI entries from the .debug_frame section. """ cfi = CallFrameInfo( stream=self.debug_frame_sec.stream, @@ -152,6 +154,20 @@ class DWARFInfo(object): base_structs=self.structs) return cfi.get_entries() + def has_EH_CFI(self): + """ Does this dwarf info have a eh_frame CFI section? + """ + return self.eh_frame_sec is not None + + def EH_CFI_entries(self): + """ Get a list of eh_frame CFI entries from the .eh_frame section. + """ + cfi = CallFrameInfo( + stream=self.eh_frame_sec.stream, + size=self.eh_frame_sec.size, + base_structs=self.structs) + return cfi.get_entries() + def location_lists(self): """ Get a LocationLists object representing the .debug_loc section of the DWARF data, or None if this section doesn't exist. @@ -212,7 +228,7 @@ class DWARFInfo(object): cu_structs = DWARFStructs( little_endian=self.config.little_endian, dwarf_format=dwarf_format, - address_size=8) + address_size=8) cu_die_offset = self.debug_info_sec.stream.tell() dwarf_assert( diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index fb4074a..2cc281f 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -71,8 +71,12 @@ class DWARFStructs(object): See also the documentation of public methods. """ - def __init__(self, little_endian, dwarf_format, address_size): - """ little_endian: + def __init__(self, + little_endian, dwarf_format, address_size, dwarf_version=2): + """ dwarf_version: + Numeric DWARF version + + little_endian: True if the file is little endian, False if big dwarf_format: @@ -87,6 +91,7 @@ class DWARFStructs(object): self.little_endian = little_endian self.dwarf_format = dwarf_format self.address_size = address_size + self.dwarf_version = dwarf_version self._create_structs() def initial_length_field_size(self): @@ -232,14 +237,27 @@ class DWARFStructs(object): ) def _create_callframe_entry_headers(self): - self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', - self.Dwarf_initial_length('length'), - self.Dwarf_offset('CIE_id'), - self.Dwarf_uint8('version'), - CString('augmentation'), - self.Dwarf_uleb128('code_alignment_factor'), - self.Dwarf_sleb128('data_alignment_factor'), - self.Dwarf_uleb128('return_address_register')) + # The CIE header was modified in DWARFv4. + if self.dwarf_version == 4: + self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', + self.Dwarf_initial_length('length'), + self.Dwarf_offset('CIE_id'), + self.Dwarf_uint8('version'), + CString('augmentation'), + self.Dwarf_uint8('address_size'), + self.Dwarf_uint8('segment_size'), + self.Dwarf_uleb128('code_alignment_factor'), + self.Dwarf_sleb128('data_alignment_factor'), + self.Dwarf_uleb128('return_address_register')) + else: + self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', + self.Dwarf_initial_length('length'), + self.Dwarf_offset('CIE_id'), + self.Dwarf_uint8('version'), + CString('augmentation'), + self.Dwarf_uleb128('code_alignment_factor'), + self.Dwarf_sleb128('data_alignment_factor'), + self.Dwarf_uleb128('return_address_register')) self.Dwarf_FDE_header = Struct('Dwarf_FDE_header', self.Dwarf_initial_length('length'), diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index a8e605f..ccb7e56 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -126,15 +126,15 @@ class ELFFile(object): # debug_sections = {} for secname in (b'.debug_info', b'.debug_abbrev', b'.debug_str', - b'.debug_line', b'.debug_frame', b'.debug_loc', - b'.debug_ranges'): + b'.debug_line', b'.debug_frame', + b'.debug_loc', b'.debug_ranges'): section = self.get_section_by_name(secname) if section is None: debug_sections[secname] = None else: debug_sections[secname] = self._read_dwarf_section( - section, - relocate_dwarf_sections) + section, + relocate_dwarf_sections) return DWARFInfo( config=DwarfConfig( @@ -144,6 +144,8 @@ class ELFFile(object): debug_info_sec=debug_sections[b'.debug_info'], debug_abbrev_sec=debug_sections[b'.debug_abbrev'], debug_frame_sec=debug_sections[b'.debug_frame'], + # TODO(eliben): reading of eh_frame is not hooked up yet + eh_frame_sec=None, debug_str_sec=debug_sections[b'.debug_str'], debug_loc_sec=debug_sections[b'.debug_loc'], debug_ranges_sec=debug_sections[b'.debug_ranges'], diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index 7c2b74c..4ae73d7 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -202,6 +202,9 @@ class RelocationHandler(object): def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0): return sym_value + addend + def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0): + return sym_value + addend - offset + _RELOCATION_RECIPES_X86 = { ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), @@ -218,6 +221,9 @@ class RelocationHandler(object): bytesize=8, has_addend=True, calc_func=_reloc_calc_identity), ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE( bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE( + bytesize=8, has_addend=True, + calc_func=_reloc_calc_sym_plus_addend_pcrel), ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE( diff --git a/test/testfiles_for_readelf/gcc48-simple.o b/test/testfiles_for_readelf/gcc48-simple.o index 5eb04fa7328742143c35beaaceab2b68ac4a12ad..851a21ace8e5e24f76efa499caac01316a6f5b66 100644 GIT binary patch delta 904 zcma)4&ubGw6rS0cWPeOeb~WjwD5M7!gQUL{e_*2p6+G4=n1Z08Nob@|8bQU2f>c2~ zG<{Hd5$Y|7m!2wK`~yU&;K6@DD4xt+sPE0rmQs4~gWdP`d++;Z-p;J)KSRr>^sr>R zm@?TE8)QeiS6a_=__>1+j6V$rTyL4jos)6ceb3uWQauyCa-_iIP+Gbe-int}-oXS7 z@J%3Jf_OGV&RkgUv&bDY6~@rB#Mpm2ZuGwJ=)uEs{YkCUoG8yVTjkoLHq@PKu>XE%_8i*l1!VbLu@m@1S$+4@*g z$C$z=8LD9U8*tOrup>P_O+l*j(nbE>aA)Y6AjS>%!HixP&=rF#sSvi@f?u~CpP}!b&#%+ih&4N7 z9`g)TywlDF1^2xl+(&I*2&O2)f!7ckfSqCFbw@b!wco{07~j8_uw7+(ov7wmdHj@D)<&?uHaWNiv4*t%3Zjw GF8u@I*Ne>n delta 699 zcmZvY&nrYx6vyv>H6 z?qSbXmN_ry&!Gjb+%6l=|JA9W(Co+f#8ug=33(?`hlU%qfAg()<@t$pAYnI z<=1n)`NHDrR&FB?2_p>)##|`r2o)hq@=r)3JQ{aUw%Xv*jCT2ITK%44H}R9!*rSS% zLeBJ9J045aEL5yGotCVmSW>b{xH7!1!TOpmS!<*byKwyLx+Wd2mBWm5Sdk6|zJ&MHtg5b@QptZveTdKV^Gj}~;G$eyT;!4X|2j_6Qw`e}nAF1$F0 zu>fxP_^eeou0@H20PNR$?(gXsY$CCwqV^;fxZH~|Bq>Ehzora^xknjH^FH95gU>st Q3AcPG+NM+@N8Lt}FTA~H{r~^~ diff --git a/test/testfiles_for_readelf/gcc48-simple.src.c b/test/testfiles_for_readelf/gcc48-simple.src.c new file mode 100644 index 0000000..23849b3 --- /dev/null +++ b/test/testfiles_for_readelf/gcc48-simple.src.c @@ -0,0 +1,19 @@ +/* Generated by compiling with gcc 4.8 as follows: +** +** gcc-4.8 -O0 -g -fno-dwarf2-cfi-asm -c dwarf4_simple.c -o gcc48-simple. +** +** Note: -fno-dwarf2-cfi-asm to tell gcc to generate .dwarf_frames as well +** as the .eh_frames it generates by default. +** +*/ + +extern int bar(int); +extern int baz(int); + +int foo(int v) { + int x = bar(v); + int i; + for (i = 0; i < v; ++i) + x += bar(i) + bar(v) * baz(i); + return x; +} -- 2.30.2