From 97bf779459eb607920a374a392e4514ef5d56f38 Mon Sep 17 00:00:00 2001 From: Ronan Dunklau Date: Tue, 10 May 2022 15:56:32 +0200 Subject: [PATCH] Improve DWARF 5 compatibility. (#400) * Add support DW_FORM_implicit_const * Add support for DW_FORM_line_strp * Add new tests for DW_FORM_implicit_const and DW_FORM_linestrp. --- elftools/dwarf/die.py | 18 ++++-- elftools/dwarf/dwarfinfo.py | 10 +++- elftools/dwarf/structs.py | 5 +- elftools/elf/elffile.py | 7 ++- test/test_dwarf_v5_forms.py | 52 ++++++++++++++++++ test/test_refaddr_bitness.py | 1 + .../dwarf_v5_forms.debug | Bin 0 -> 5296 bytes 7 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 test/test_dwarf_v5_forms.py create mode 100755 test/testfiles_for_unittests/dwarf_v5_forms.debug diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index dd9d592..810cef4 100755 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -237,11 +237,18 @@ class DIE(object): # Guided by the attributes listed in the abbreviation declaration, parse # values from the stream. - for name, form in abbrev_decl.iter_attr_specs(): + for spec in abbrev_decl['attr_spec']: + form = spec.form + name = spec.name attr_offset = self.stream.tell() - raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream) - - value = self._translate_attr_value(form, raw_value) + # Special case here: the attribute value is stored in the attribute + # definition in the abbreviation spec, not in the DIE itself. + if form == 'DW_FORM_implicit_const': + value = spec.value + raw_value = value + else: + raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream) + value = self._translate_attr_value(form, raw_value) self.attributes[name] = AttributeValue( name=name, form=form, @@ -258,6 +265,9 @@ class DIE(object): if form == 'DW_FORM_strp': with preserve_stream_pos(self.stream): value = self.dwarfinfo.get_string_from_table(raw_value) + elif form == 'DW_FORM_line_strp': + with preserve_stream_pos(self.stream): + value = self.dwarfinfo.get_string_from_linetable(raw_value) elif form == 'DW_FORM_flag': value = not raw_value == 0 elif form == 'DW_FORM_flag_present': diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 48c4bb5..a300e7b 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -73,7 +73,8 @@ class DWARFInfo(object): debug_pubtypes_sec, debug_pubnames_sec, debug_addr_sec, - debug_str_offsets_sec): + debug_str_offsets_sec, + debug_line_str_sec): """ config: A DwarfConfig object @@ -92,6 +93,7 @@ class DWARFInfo(object): self.debug_loc_sec = debug_loc_sec self.debug_ranges_sec = debug_ranges_sec self.debug_line_sec = debug_line_sec + self.debug_line_str_sec = debug_line_str_sec self.debug_pubtypes_sec = debug_pubtypes_sec self.debug_pubnames_sec = debug_pubnames_sec @@ -231,6 +233,12 @@ class DWARFInfo(object): """ return parse_cstring_from_stream(self.debug_str_sec.stream, offset) + def get_string_from_linetable(self, offset): + """ Obtain a string from the string table section, given an offset + relative to the section. + """ + return parse_cstring_from_stream(self.debug_line_str_sec.stream, offset) + def line_program_for_CU(self, CU): """ Given a CU object, fetch the line program it points to from the .debug_line section. diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 16f29f6..79e0d8f 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -182,7 +182,9 @@ class DWARFStructs(object): obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null', Struct('attr_spec', Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT), - Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)))) + Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM), + If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const', + self.Dwarf_sleb128('value'))))) def _create_dw_form(self): self.Dwarf_dw_form = dict( @@ -208,6 +210,7 @@ class DWARFStructs(object): DW_FORM_string=CString(''), DW_FORM_strp=self.Dwarf_offset(''), + DW_FORM_line_strp=self.Dwarf_offset(''), DW_FORM_strx1=self.Dwarf_uint8(''), DW_FORM_strx2=self.Dwarf_uint16(''), # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 244841a..10367ad 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -217,7 +217,9 @@ class ELFFile(object): section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str', '.debug_line', '.debug_frame', '.debug_loc', '.debug_ranges', '.debug_pubtypes', - '.debug_pubnames', '.debug_addr', '.debug_str_offsets') + '.debug_pubnames', '.debug_addr', + '.debug_str_offsets', '.debug_line_str') + compressed = bool(self.get_section_by_name('.zdebug_info')) if compressed: @@ -230,7 +232,7 @@ class ELFFile(object): debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name, debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name, debug_pubnames_name, debug_addr_name, debug_str_offsets_name, - eh_frame_sec_name) = section_names + debug_line_str_name, eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -263,6 +265,7 @@ class ELFFile(object): debug_pubnames_sec=debug_sections[debug_pubnames_name], debug_addr_sec=debug_sections[debug_addr_name], debug_str_offsets_sec=debug_sections[debug_str_offsets_name], + debug_line_str_sec=debug_sections[debug_line_str_name] ) def has_ehabi_info(self): diff --git a/test/test_dwarf_v5_forms.py b/test/test_dwarf_v5_forms.py new file mode 100644 index 0000000..efae53c --- /dev/null +++ b/test/test_dwarf_v5_forms.py @@ -0,0 +1,52 @@ +# The dwarf_v5_forms.debug file was generated as follows, using gcc 11.2.0 on +# an x86_64 machine. +# $ cat dwarf_v5_forms.c +# int main(); +# { +# char ** val; +# return 0; +# } +# $ gcc -O0 -gdwarf-5 dwarf_v5_forms.c -o dwarf_v5_forms.debug +# $ strip --only-keep-debug dwarf_v5_forms.debug +import unittest +import os + + +from elftools.elf.elffile import ELFFile + +class TestDWARFV5_forms(unittest.TestCase): + + def test_DW_FORM_implicit_const(self): + path = os.path.join('test', 'testfiles_for_unittests', + 'dwarf_v5_forms.debug') + with open(path, 'rb') as f: + elffile = ELFFile(f) + dwarfinfo = elffile.get_dwarf_info() + # File is very small, so load all DIEs. + dies = [] + for cu in dwarfinfo.iter_CUs(): + dies.extend(cu.iter_DIEs()) + # Locate the "var" DIE. + for die in dies: + # There should be only one + if (die.tag == "DW_TAG_variable" and + die.attributes["DW_AT_name"].value == b'val'): + # In the dwarfinfo, it's type is sized using a + # DW_FORM_implicit_const: check it is parsed correctly + break + dietype = cu.get_DIE_from_refaddr(die.attributes["DW_AT_type"].value) + byte_size_attr = dietype.attributes["DW_AT_byte_size"] + self.assertEqual(byte_size_attr.form, "DW_FORM_implicit_const") + self.assertEqual(byte_size_attr.value, 8) + + def test_DW_FORM_linestrp(self): + path = os.path.join('test', 'testfiles_for_unittests', + 'dwarf_v5_forms.debug') + with open(path, 'rb') as f: + elffile = ELFFile(f) + dwarfinfo = elffile.get_dwarf_info() + cu = next(dwarfinfo.iter_CUs()) + top_die = cu.get_top_DIE() + name_attr = top_die.attributes["DW_AT_name"] + self.assertEqual(name_attr.form, "DW_FORM_line_strp") + self.assertEqual(name_attr.value, b"dwarf_v5_forms.c") diff --git a/test/test_refaddr_bitness.py b/test/test_refaddr_bitness.py index b9e8874..ea01db6 100644 --- a/test/test_refaddr_bitness.py +++ b/test/test_refaddr_bitness.py @@ -46,6 +46,7 @@ class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase): debug_pubnames_sec = None, debug_addr_sec=None, debug_str_offsets_sec=None, + debug_line_str_sec=None, ) CUs = [cu for cu in di.iter_CUs()] diff --git a/test/testfiles_for_unittests/dwarf_v5_forms.debug b/test/testfiles_for_unittests/dwarf_v5_forms.debug new file mode 100755 index 0000000000000000000000000000000000000000..4f6a402ea20244b28a0723996ec862b202106d44 GIT binary patch literal 5296 zcmb_gU2GIp6h5=tLSgwStwIT!4p_AqyDbPNp@$->}+j` zQC|FcF+m^n1rwrAKJrc?F~0a9F(gKOAkhb32vK=3l9-_9%$&2Gz1z$V~EvnI}?smwB90PZ}d#!#sao=9nl_=PfZ$m*dP6 zNja#U{x!7!OTLrLlliupQ8*A8TPy1>`CdjI)#YKEEbA%G_n>vS$@P@we^4G8*AQN` zrzU64K-t~s_uVMpzVp^^wY#4Vj*$<| zgJO~oHNxnkHaRwS(Co*a-R7P>*}d5zcoxxkL&fX6eM{sBnOLm42Z*$T5jjV^$H|3A z&6w$f*kKs$uQlm6E)m+cXqCZ8ueVW$_O^D{3&C(vH$1$pG1eTV4r4Q9c6Lm4<5;&C zPj2eYqp8#OK<~DFknWHUjNb>D%#lnQ zN56(ude~mVs~tMv8OmfBe{b4If@+8tD!m5HFJ<(mHp{a|3!#a{vngCO z1IufU?RlOnYz|byn&%uYI-V1_1vIUoP&&Lcd|=?fK69XGzi9>Y1N+T^*&(P{t_OvZ z6+ncl#r^P%sTt2mBUjmF;wcmL7*=~y8gT=*L4OVvY&33a3NHh?V2IDN z2*lfacv1XL-l>9rjEaFfQO-@lub|Rw{H^Bobrh!)7EFn-BAdneKJxDd{k;7tqHSJv zPTh(89|g!o4)t3KC*D=JR+L`E%@wn*y%^zJp(j^|BIOX`ov^iDPw5>+yeqX4`n0+o zXMa7;nJB)nSEP;jmef`-b1N8W=LiRUp3E4RGv@;0ow&Y3xfqxG4B}n5jwbJir?}%? z_Lu!2{vWZwJaTeEeS$cRNB936aZ|_%c@^I8gUdfiWT4Ekd(o$`l@7k zHnFB=vBP#fRjWES2J!Hw^3^JHqf=5T%p^0SAxQBX2cQZ|7Nxh!?s=y^?7i9j5Y-JJ zTV1Y%Rvu*-#IhtBzKWb+0kYBB7rk0`A@CQRAY4vB`I=j{2V9$>k(coEXiNs0ZRT4*iVFB|8pAup2nA-i86j!Cy{S!{0{z6Ci#aru|9qo&xaa+ zSmR$~N6D|x|5N5mfBpTy?+MzU$)>O0XV^(Gg)i?riPNSPOn?1+q4DeQZ+6_6Opdyq z|4ZiAzb#BAFkklcN?biK&@H@J=_cH&R&`u>-FxehVe`>592AG~pH$w0YBGo$|& Qn(@C|9}`~H2$~810$n}c_W%F@ literal 0 HcmV?d00001 -- 2.30.2