From 4384ad8928ad0167f8cc543056433080decef9cb Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Thu, 27 May 2021 09:38:35 -0400 Subject: [PATCH] dwarf: initial DWARFv5 support (#363) * dwarf: initial DWARFv5 support * dwarf/structs: use Embed to select header layout * dwarf/structs: DW_FORM_strx family Not sure how best to handle 24-bit values yet. * dwarf/structs: use IfThenElse `If` alone wraps the else in a `Value`. * dwarf/structs: DW_FORM_addrx family handling * dwarf_expr: support DW_OP_addrx Not complete, but gets readelf.py to the end of a single binary. * dwarf/constants: DW_UT_* constants * dwarf/structs: fix some DW_FORMs * elftools, test: plumbing for DWARFv5 sections * dwarf/constants: fix typo * dwarf/structs: re-add a comment that got squashed * dwarf/structs: DWARFv5 table header scaffolding * dwarf/constants: typo * test: add a basic DWARFv5 test --- elftools/dwarf/constants.py | 15 ++++++ elftools/dwarf/dwarf_expr.py | 1 + elftools/dwarf/dwarfinfo.py | 6 ++- elftools/dwarf/structs.py | 43 ++++++++++++++++-- elftools/elf/elffile.py | 11 +++-- test/test_dwarf_v5.py | 21 +++++++++ test/test_refaddr_bitness.py | 4 +- .../testfiles_for_unittests/dwarfv5_basic.elf | Bin 0 -> 17568 bytes 8 files changed, 90 insertions(+), 11 deletions(-) create mode 100644 test/test_dwarf_v5.py create mode 100755 test/testfiles_for_unittests/dwarfv5_basic.elf diff --git a/elftools/dwarf/constants.py b/elftools/dwarf/constants.py index d1a86fc..6542095 100644 --- a/elftools/dwarf/constants.py +++ b/elftools/dwarf/constants.py @@ -207,3 +207,18 @@ DW_CFA_val_offset = 0x14 DW_CFA_val_offset_sf = 0x15 DW_CFA_val_expression = 0x16 DW_CFA_GNU_args_size = 0x2e + + +# Compilation unit types +# +# DWARFv5 introduces the "unit_type" field to each CU header, allowing +# individual CUs to indicate whether they're complete, partial, and so forth. +# See DWARFv5 3.1 ("Unit Entries") and 7.5.1 ("Unit Headers"). +DW_UT_compile = 0x01 +DW_UT_type = 0x02 +DW_UT_partial = 0x03 +DW_UT_skeleton = 0x04 +DW_UT_split_compile = 0x05 +DW_UT_split_type = 0x06 +DW_UT_lo_user = 0x80 +DW_UT_hi_user = 0xff diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index bb85daa..07c6fa1 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -197,6 +197,7 @@ def _init_dispatch_table(structs): return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))] add('DW_OP_addr', parse_op_addr()) + add('DW_OP_addrx', parse_arg_struct(structs.Dwarf_uleb128(''))) add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8(''))) add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8(''))) add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16(''))) diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 103fc49..48c4bb5 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -71,7 +71,9 @@ class DWARFInfo(object): debug_ranges_sec, debug_line_sec, debug_pubtypes_sec, - debug_pubnames_sec): + debug_pubnames_sec, + debug_addr_sec, + debug_str_offsets_sec): """ config: A DwarfConfig object @@ -434,7 +436,7 @@ class DWARFInfo(object): def _is_supported_version(self, version): """ DWARF version supported by this parser """ - return 2 <= version <= 4 + return 2 <= version <= 5 def _parse_line_program_at_offset(self, debug_line_offset, structs): """ Given an offset to the .debug_line section, parse the line program diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 9513053..16f29f6 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -11,7 +11,7 @@ from ..construct import ( UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray, - CString, Embed, StaticField + CString, Embed, StaticField, IfThenElse ) from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128 from .enums import * @@ -89,7 +89,7 @@ class DWARFStructs(object): section 7.5.1) """ assert dwarf_format == 32 or dwarf_format == 64 - assert address_size == 8 or address_size == 4 + assert address_size == 8 or address_size == 4, str(address_size) self.little_endian = little_endian self.dwarf_format = dwarf_format self.address_size = address_size @@ -138,6 +138,8 @@ class DWARFStructs(object): self._create_callframe_entry_headers() self._create_aranges_header() self._create_nameLUT_header() + self._create_string_offsets_table_header() + self._create_address_table_header() def _create_initial_length(self): def _InitialLength(name): @@ -160,8 +162,16 @@ class DWARFStructs(object): self.Dwarf_CU_header = Struct('Dwarf_CU_header', self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), - self.Dwarf_offset('debug_abbrev_offset'), - self.Dwarf_uint8('address_size')) + # DWARFv5 reverses the order of address_size and debug_abbrev_offset. + IfThenElse('', lambda ctx: ctx['version'] >= 5, + Embed(Struct('', + self.Dwarf_uint8('unit_type'), + self.Dwarf_uint8('address_size'), + self.Dwarf_offset('debug_abbrev_offset'))), + Embed(Struct('', + self.Dwarf_offset('debug_abbrev_offset'), + self.Dwarf_uint8('address_size'))), + )) def _create_abbrev_declaration(self): self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry', @@ -177,6 +187,11 @@ class DWARFStructs(object): def _create_dw_form(self): self.Dwarf_dw_form = dict( DW_FORM_addr=self.Dwarf_target_addr(''), + DW_FORM_addrx=self.Dwarf_uleb128(''), + DW_FORM_addrx1=self.Dwarf_uint8(''), + DW_FORM_addrx2=self.Dwarf_uint16(''), + # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO + DW_FORM_addrx4=self.Dwarf_uint32(''), DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8), DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16), @@ -193,6 +208,10 @@ class DWARFStructs(object): DW_FORM_string=CString(''), DW_FORM_strp=self.Dwarf_offset(''), + DW_FORM_strx1=self.Dwarf_uint8(''), + DW_FORM_strx2=self.Dwarf_uint16(''), + # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO + DW_FORM_strx4=self.Dwarf_uint64(''), DW_FORM_flag=self.Dwarf_uint8(''), DW_FORM_ref=self.Dwarf_uint32(''), @@ -233,6 +252,22 @@ class DWARFStructs(object): self.Dwarf_length('debug_info_length') ) + def _create_string_offsets_table_header(self): + self.Dwarf_string_offsets_table_header = Struct( + "Dwarf_string_offets_table_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_uint16('padding'), + ) + + def _create_address_table_header(self): + self.Dwarf_address_table_header = Struct("Dwarf_address_table_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_uint8('address_size'), + self.Dwarf_uint8('segment_selector_size'), + ) + def _create_lineprog_header(self): # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 779ae3b..acddc89 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -212,7 +212,7 @@ class ELFFile(object): section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str', '.debug_line', '.debug_frame', '.debug_loc', '.debug_ranges', '.debug_pubtypes', - '.debug_pubnames') + '.debug_pubnames', '.debug_addr', '.debug_str_offsets') compressed = bool(self.get_section_by_name('.zdebug_info')) if compressed: @@ -224,7 +224,8 @@ class ELFFile(object): (debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name, debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name, debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name, - debug_pubnames_name, eh_frame_sec_name) = section_names + debug_pubnames_name, debug_addr_name, debug_str_offsets_name, + eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -253,8 +254,10 @@ class ELFFile(object): debug_loc_sec=debug_sections[debug_loc_sec_name], debug_ranges_sec=debug_sections[debug_ranges_sec_name], debug_line_sec=debug_sections[debug_line_sec_name], - debug_pubtypes_sec = debug_sections[debug_pubtypes_name], - debug_pubnames_sec = debug_sections[debug_pubnames_name] + debug_pubtypes_sec=debug_sections[debug_pubtypes_name], + debug_pubnames_sec=debug_sections[debug_pubnames_name], + debug_addr_sec=debug_sections[debug_addr_name], + debug_str_offsets_sec=debug_sections[debug_str_offsets_name], ) def has_ehabi_info(self): diff --git a/test/test_dwarf_v5.py b/test/test_dwarf_v5.py new file mode 100644 index 0000000..0468d07 --- /dev/null +++ b/test/test_dwarf_v5.py @@ -0,0 +1,21 @@ +import os +import unittest + +from elftools.elf.elffile import ELFFile + + +class TestDWARFv5(unittest.TestCase): + def test_dwarfv5_parses(self): + dwarfv5_basic = os.path.join('test', 'testfiles_for_unittests', 'dwarfv5_basic.elf') + with open(dwarfv5_basic, 'rb') as f: + elf = ELFFile(f) + # DWARFv5 debugging information is detected. + self.assertTrue(elf.has_dwarf_info()) + + # Fetching DWARFInfo for DWARFv5 doesn't completely explode. + dwarf = elf.get_dwarf_info() + self.assertIsNotNone(dwarf) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_refaddr_bitness.py b/test/test_refaddr_bitness.py index 85fc56d..b9e8874 100644 --- a/test/test_refaddr_bitness.py +++ b/test/test_refaddr_bitness.py @@ -43,7 +43,9 @@ class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase): debug_ranges_sec = None, debug_line_sec = None, debug_pubtypes_sec = None, - debug_pubnames_sec = None + debug_pubnames_sec = None, + debug_addr_sec=None, + debug_str_offsets_sec=None, ) CUs = [cu for cu in di.iter_CUs()] diff --git a/test/testfiles_for_unittests/dwarfv5_basic.elf b/test/testfiles_for_unittests/dwarfv5_basic.elf new file mode 100755 index 0000000000000000000000000000000000000000..4a9363ca6e1d8faf7c1ccb56144418c6c8b7fb35 GIT binary patch literal 17568 zcmeHOZ)_aJ6`$Sp#Rezma}pYpK)gU)OdDV9kQhTqvgg>&xpG39KcuvT&H3(ZUpe1d zcY6>c+NMp4axKtQf{OMNRr?`TQPocss@jq$Q36swnEq2FRFwj0Lra6xHZ7vS_4j7y zo%hz~f>fyz>ON`r&HU#5-pst6-JQAH`NH7HkgjQhMHgEH<*bM$A@?lA)0<>gVSS=m zSYnO1Nh|`=fWwj{q}>$j3EeGV-K6XcXyTnvmSoojCq?PyCgn(5HTq7O6M{AK zJ_#PR^MeY9eYdJ_p}5Sd{;^hBu`Zh#?cK00o9fDDa-~CEhx&TEdN)Li`RIDt@8xI+ zjj8QB_lec#pZrG4KY#Iz`D*5zonE$d@`R?r8c?K zRZleZg8ISnF0IU5MWr{YI%l_9GDX*p`%b`H9-QLB(q>sQ*@!dw4J3#_5P={9K?H&b z1Q7@#5JVt|KoEfsZv_6ncwfByO8m%cv%B^VpF26zYKi!{QwOCw*9}#<=1buH z{W#g59*efLAC>NpKiiE3Mtn1!on_AwMoqZtIRYv#j~zcfHfK!r|4iC{@sd>K+4!kT zTjQtRGUD3V_>1p3tyK?=%7dRz^&j565p5BrW&7gg3l|?kFV2Q;0-_x_J+D5p|MOSo z+CUV<%PfEHop`x48-K3Bw5ZkM;w$e}Dm^ch2j3_^kMNlXPPeoV2toYV{*lKve`l=_ zvAc)MzlrS|E?Zp3EdO2n$Z0Kp`)^BsqU-aS1F?H!2V(cdY;PZBe8D3vuWR*6lD9yD2m}!bA`nC% zh(Hj5AOb-If(Qf=2qF+f;QuuO8vX^0r?c7oTJyntA)C4ZfzVbNoBE*XKf=YcmCD1= zYoYf--wREv|LZPSD&K`3fj$HM6X?bGgOL7*CD9(*Ewn=sZPnt&rsKfr|H$ib%)D8t z(BfMRG)4&P#_UN2uFs(ZJ%gqez57V*t&Al$8KG7Bf;b=i(?Ay z`8WZwaO4qv+mc0Ns1RB5@hFbNC{II+p>X7D`f#}IabqxSKGrZ8?tC;f5bl0tQ9Rst zq;Y%L%7y!4;qF+tvp;P1huiwYk^XR#Z1=A~KLJ17MsgQO5P={9K?H&b1Q7@#5JVt| zKoEf-0zm{m>=7{WUoFY3`aYV!`DDFVmg1z!m#F-C#k*GJ`8(%TD$n03uT=TAEtX8t zI*x?Pue?{u%e-Nca=+PzKBn^g&2gv7UoEYK*sL^tk4w_#Dshd{tg7F(;{Rn5U7qLc zdHqV?yOOMMl`tDDnPPla)z9swbu0-!ww1#5;~!nwpBZJ3KbvJ&Lfm+jfW9xo@${`f8*Xu7k<2D?ko&j$*D!c*a1{D zG=v&LyO9X(r(EMAd;s05b#yEX86AZckrk^tT30PyRK$hUqZ%cxHlyQ?=1t8vUpu^P zE5@r461swhm%+mtVRUS1ZpKIowFpAQr^!2-bQ`p$Y5xi@Aw`!E2NLb_Y8qO*-V$1^ z$ss!X{a3&D+Ba8!yBO=)KlsZ^Lj#6}P9H*$EDz~dUkGVUoAoBp-zQKL^y^GCDb{_q zl)GQZPr)-eMb4YZi=Yi}h6eu?7yzZP#4_3j%3mx}MVTKlU6 z-?z2+)z$mE7RMNnAMu`B;m$2;N6hi$|*aFlzBuvCFi)kW3rc zX~550Z=oTU34YGj;>$(7``*CQlJ^JH+J6Lt#qvTjZQ$u>*2UWdeV^kOL-5<7w!{$p zR`BEe_VDBU*6`!k*8(bMh}8?=bZ5*LLzweGy2Bg9<%++6?mJy95r=)_A*}Ep`tTbR z&hJi zdVYQ&O_t6ZZ_feO@T;(Yygje%{p0o}iO(PZm!zNhs*Li*>3r^7~;j6x+NTmzu@l4T47i?$JPGv7sm3rR?16iB#&!&Et1Xecv;bjanLQX+9^wPLg}r^` zuKw7Feb>;?p25BL-dO+0AbFL0yOPC{Y_i<&Wmz@*yV9wIlVHQSO<+DNo2=YjV6d&( z5{57q^Y%m{m&&GvJ$x6eQkk4xDyGqix%#Pl!cJxFLOPpICY&^Rb23SLDg&B5S}ZE} za^7Lvdv|P8p%KyI^rVv*g?0+Ap5PSbC+WhJi01N6Iy#;!MMp~+?A*(w6cpTH6QbNzIs;i zxjt?`+p)eMd|E?tp69hQif_tV-T!L6?N_jP2nmWQ^LhR|tN6Tb<@&ij-2bDB->)3< zI@nYfitUJxxAVOCJr0c8B^O1|g6Bi@=P$%R!&7eELY&JuynIvfO~p5>mTp49c!#<2 z1eBN0?>kEwJV}KfVID)@Lc+`EdHnfS*MfilV0(Z2pH_U1e|5fZx{Rf+s-FG)6DXnj znLoEb+ojxDWx5GJ|3`qmd|p57GF|(NuBx8-`ZI76i7*Z|T&n8~-oNd)=RE75gNs>A zmCx&uwkSXd+qt>9Jmcq4Kx2pN=XoUWkDpQ3kH;7D_&WX`Try!kuWx46goo{5B8B;^ z{{Ym>udai-U3=42)id*0zljnrpXY_9;xAM`JG_Df`QrBTJgZyrxgNHsy1eo>GK4Af zY7z74hZV0dpS6Jmr@eO5%Hh3U5NW?{%L4pgEB>zc;a|T1zoW+`((~0T{`zlGe1HFs zDE^?Aj5PCEZ%}-nnRs6DBfbLWF>EcsKd<xN1o3#{=Q{nK$z#vUpXzgSFK|Vw(3iy;rau4nJ6uB2NAP9EzX5%VSN{M2 literal 0 HcmV?d00001 -- 2.30.2