From ba854b9c1849bdaef7dabdce5c48efdc6e7328d9 Mon Sep 17 00:00:00 2001 From: Will Denissen <85223780+WillDenissen@users.noreply.github.com> Date: Thu, 22 Sep 2022 22:11:30 +0200 Subject: [PATCH] Added Dwarfv5 CU headers (#442) Extended readelf.py Added test file compiled with -gdebug_types-section to readelf tests --- elftools/dwarf/descriptions.py | 5 +- elftools/dwarf/dwarfinfo.py | 21 ++++++--- elftools/dwarf/structs.py | 44 ++++++++++++++---- scripts/dwarfdump.py | 3 +- scripts/readelf.py | 30 +++++++++--- .../cuv5_x86-64_gcc.so.elf | Bin 0 -> 17768 bytes 6 files changed, 80 insertions(+), 23 deletions(-) create mode 100755 test/testfiles_for_readelf/cuv5_x86-64_gcc.so.elf diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 90cbaa1..7db7f3f 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -187,6 +187,9 @@ _MACHINE_ARCH = None def _describe_attr_ref(attr, die, section_offset): return '<0x%x>' % (attr.value + die.cu.cu_offset) +def _describe_attr_ref_sig8(attr, die, section_offset): + return 'signature: 0x%x' % (attr.value) + def _describe_attr_value_passthrough(attr, die, section_offset): return attr.value @@ -257,7 +260,7 @@ _ATTR_DESCRIPTION_MAP = defaultdict( DW_FORM_block=_describe_attr_block, DW_FORM_flag_present=_describe_attr_present, DW_FORM_exprloc=_describe_attr_block, - DW_FORM_ref_sig8=_describe_attr_ref, + DW_FORM_ref_sig8=_describe_attr_ref_sig8, ) _FORM_CLASS = dict( diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 4edc7cd..96f33d9 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -60,7 +60,7 @@ DwarfConfig = namedtuple('DwarfConfig', class DWARFInfo(object): """ Acts also as a "context" to other major objects, bridging between - various parts of the debug infromation. + various parts of the debug information. """ def __init__(self, config, @@ -126,7 +126,9 @@ class DWARFInfo(object): # Cache for abbrev tables: a dict keyed by offset self._abbrevtable_cache = {} - + # Cache for program lines tables: a dict keyed by offset + self._linetable_cache = {} + # Cache of compile units and map of their offsets for bisect lookup. # Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at(). self._cu_cache = [] @@ -494,15 +496,19 @@ class DWARFInfo(object): """ return 2 <= version <= 5 - def _parse_line_program_at_offset(self, debug_line_offset, structs): + def _parse_line_program_at_offset(self, offset, structs): """ Given an offset to the .debug_line section, parse the line program starting at this offset in the section and return it. structs is the DWARFStructs object used to do this parsing. """ + + if offset in self._linetable_cache: + return self._linetable_cache[offset] + lineprog_header = struct_parse( structs.Dwarf_lineprog_header, self.debug_line_sec.stream, - debug_line_offset) + offset) # DWARF5: resolve names def resolve_strings(self, lineprog_header, format_field, data_field): @@ -541,16 +547,19 @@ class DWARFInfo(object): for e in lineprog_header.file_names) # Calculate the offset to the next line program (see DWARF 6.2.4) - end_offset = ( debug_line_offset + lineprog_header['unit_length'] + + end_offset = ( offset + lineprog_header['unit_length'] + structs.initial_length_field_size()) - return LineProgram( + lineprogram = LineProgram( header=lineprog_header, stream=self.debug_line_sec.stream, structs=structs, program_start_offset=self.debug_line_sec.stream.tell(), program_end_offset=end_offset) + self._linetable_cache[offset] = lineprogram + return lineprogram + def parse_debugsupinfo(self): """ Extract a filename from either .debug_sup or .gnu_debualtlink sections. diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index fe68d43..8fa6b58 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -180,18 +180,46 @@ class DWARFStructs(object): self.Dwarf_sleb128 = SLEB128 def _create_cu_header(self): + dwarfv4_CU_header = Struct('', + self.Dwarf_offset('debug_abbrev_offset'), + self.Dwarf_uint8('address_size') + ) + # DWARFv5 reverses the order of address_size and debug_abbrev_offset. + # DWARFv5 7.5.1.1 + dwarfv5_CP_CU_header = Struct('', + self.Dwarf_uint8('address_size'), + self.Dwarf_offset('debug_abbrev_offset') + ) + # DWARFv5 7.5.1.2 + dwarfv5_SS_CU_header = Struct('', + self.Dwarf_uint8('address_size'), + self.Dwarf_offset('debug_abbrev_offset'), + self.Dwarf_uint64('dwo_id') + ) + # DWARFv5 7.5.1.3 + dwarfv5_TS_CU_header = Struct('', + self.Dwarf_uint8('address_size'), + self.Dwarf_offset('debug_abbrev_offset'), + self.Dwarf_uint64('type_signature'), + self.Dwarf_offset('type_offset') + ) + dwarfv5_CU_header = Struct('', + Enum(self.Dwarf_uint8('unit_type'), **ENUM_DW_UT), + Embed(Switch('', lambda ctx: ctx.unit_type, + { + 'DW_UT_compile' : dwarfv5_CP_CU_header, + 'DW_UT_partial' : dwarfv5_CP_CU_header, + 'DW_UT_skeleton' : dwarfv5_SS_CU_header, + 'DW_UT_split_compile' : dwarfv5_SS_CU_header, + 'DW_UT_type' : dwarfv5_TS_CU_header, + 'DW_UT_split_type' : dwarfv5_TS_CU_header, + }))) self.Dwarf_CU_header = Struct('Dwarf_CU_header', self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), - # DWARFv5 reverses the order of address_size and debug_abbrev_offset. IfThenElse('', lambda ctx: ctx['version'] >= 5, - Embed(Struct('', - self.Dwarf_uint8('unit_type'), - self.Dwarf_uint8('address_size'), - self.Dwarf_offset('debug_abbrev_offset'))), - Embed(Struct('', - self.Dwarf_offset('debug_abbrev_offset'), - self.Dwarf_uint8('address_size'))), + Embed(dwarfv5_CU_header), + Embed(dwarfv4_CU_header), )) def _create_abbrev_declaration(self): diff --git a/scripts/dwarfdump.py b/scripts/dwarfdump.py index e593f1c..162af88 100644 --- a/scripts/dwarfdump.py +++ b/scripts/dwarfdump.py @@ -362,8 +362,7 @@ class ReadElf(object): self._emitline(".debug_info contents:") for cu in self._dwarfinfo.iter_CUs(): if cu.header.version >= 5: - ut = next(k for (k,v) in ENUM_DW_UT.items() if v == cu.header.unit_type) - unit_type_str = " unit_type = %s," % ut + unit_type_str = " unit_type = %s," % cu.header.unit_type else: unit_type_str = '' diff --git a/scripts/readelf.py b/scripts/readelf.py index 8a44282..bca7a5b 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -1092,12 +1092,23 @@ class ReadElf(object): self._format_hex(cu['unit_length']), '%s-bit' % cu.dwarf_format())) self._emitline(' Version: %s' % cu['version']) - if cu.header.get("unit_type", False): - ut = next((key for key, value in ENUM_DW_UT.items() if value == cu.header.unit_type), '?') - self._emitline(' Unit Type: %s (%d)' % (ut, cu.header.unit_type)) - self._emitline(' Abbrev Offset: %s' % ( - self._format_hex(cu['debug_abbrev_offset']))), - self._emitline(' Pointer Size: %s' % cu['address_size']) + if cu['version'] >= 5: + if cu.header.get("unit_type", ''): + unit_type = cu.header.unit_type + self._emitline(' Unit Type: %s (%d)' % ( + unit_type, ENUM_DW_UT.get(cu.header.unit_type, 0))) + self._emitline(' Abbrev Offset: %s' % ( + self._format_hex(cu['debug_abbrev_offset']))) + self._emitline(' Pointer Size: %s' % cu['address_size']) + if unit_type in ('DW_UT_skeleton', 'DW_UT_split_compile'): + self._emitline(' Dwo id: %s' % cu['dwo_id']) + elif unit_type in ('DW_UT_type', 'DW_UT_split_type'): + self._emitline(' Signature: 0x%x' % cu['type_signature']) + self._emitline(' Type Offset: 0x%x' % cu['type_offset']) + else: + self._emitline(' Abbrev Offset: %s' % ( + self._format_hex(cu['debug_abbrev_offset']))), + self._emitline(' Pointer Size: %s' % cu['address_size']) # The nesting depth of each DIE within the tree of DIEs must be # displayed. To implement this, a counter is incremented each time @@ -1151,9 +1162,16 @@ class ReadElf(object): return self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_line_sec.name) self._emitline() + lineprogram_list = [] for cu in self._dwarfinfo.iter_CUs(): + # Avoid dumping same lineprogram multiple times lineprogram = self._dwarfinfo.line_program_for_CU(cu) + + if lineprogram in lineprogram_list: + continue + + lineprogram_list.append(lineprogram) ver5 = lineprogram.header.version >= 5 cu_filename = bytes2str(lineprogram['file_entry'][0].name) diff --git a/test/testfiles_for_readelf/cuv5_x86-64_gcc.so.elf b/test/testfiles_for_readelf/cuv5_x86-64_gcc.so.elf new file mode 100755 index 0000000000000000000000000000000000000000..f86feadf56da366388f60dc0f0c308e1ea3f0a49 GIT binary patch literal 17768 zcmeHPU2G#+RxZb#PIj}?+tWR0W|-YsYiEEovv#|FTrSVBGwsCX#9GHr+s5Yh*u=wX(iMW45Pv#`#jzTQ+4eEESz*@lf zk65n=+IdBjRlJs$P%bU2AOO_HReUx-FRCaD;)csVl32Tw}h|6OW=@WjLZ|Ms22%a*mcNOjiTS^M6%R>Hsi z=Rf<$-}=_y|NQnBfSP#`-wkVC$IFtUB-^iB@c#9Jg2>|a8Ss~8z^#WzqX$+r+~1k( zSW&b)9!KNJ&e;T@(Z02{{+%c5-}%M={O-^G1^9Ds+yWb?Jdi4_tcS(gT+sxb(oK2QEEu z>48fRTzcSn9{BWso}LoTztSQE7q5*!R_jMz6E3UzXC;~SrYd>;^qQ*we-~@3tM3+; zv0}Vc2ug0rE;wboR(5T>u-qH&7uI(s5?*v~S+t&-v+m+su8??-fOSsm`l_Zs!QCBv z#9|JmwSmvPH7DeJ8mtW-{^i0Sk_&QQBAB(C{TIzbLD%n*khiHVcV6q{GK*Pz$S zxgWcBb18rAjcb3h^7_lU8w=N8eGGXErQ8Rv=dVctKOqozUi&82%!~7}+`=3M$31-J ztoKOk+<#qr3EyHD2ae0#J|CamJ{s*EpYIQ)&r5rtv&NJCz2d{O;r{TTbSPHI^Ixz& zGsh;sI)CF==JF79aD2Kmv2Gupo(yk)v_BjjjmN{=tEZ#A;ls)J_W1DlXcxugd{^`* zWQ%S^3>8-G>T|=WP&}EOkA`1;FdPlfj`mR9Iomt@>Z8Cf`d*><0RJDJ?~Zm(hGX20 zC!m=W_dnV>J3!UNN8A@Hh2p_xYZc$alPTgUf*pJ)=4vRVk3NSxMT$F$Xet(d z6kl*;{!4QUzrBnYxI$?lhwY30ps^Z71v{>M820+DZU;a-cvZXHj!|2KW@P(Ga)P-| z61Lv$55k6qp&zJ?KnN0*jaH}I1IUd*oAtr!dOQ3uY-_$)lP^Cn6MQ)q)LV~wVLPn% zL)}1q(A{XQ61ct{HVS1ytbqOSZnWAT6e=dLzIu17)e8%b2~<**(uy6FB)8z1l&jrN ze`_ODUVV792Ti0X#D~6bwmPjr7;V%C>ruDUz7K@Zti6e-lO0000IS|<{dCxEMy<{O zAlY7Dg4*5A8er1FCM*llU+?xvf>cORH{0F%l-NdhYo$$IoEEH6wl54M1@G*rCJddNDtft>4nIZ!JA!l2lQr(r33D7lGlz;M-K|; zqw&#$5ymZgOzDB!9P}EkW-}TXM2x#99HuZDNFALhPr8+bSm`%B1?{Mc#_yM?d!>l<_adQR`&w^@T%23^m zwoo@jen_{ZsaQG@#cgJyQ>{x&B<-5d^9>Qiu1WMU^T*0$qEl)lj7c=F6-@djLjz4YB`l@OY{HaQ zQ-m-MSz%^ur*TvKhdbj)O|F*vgWfIVIRg|$0w^9j6FeF|nq-j^+~ir?-j3=ktw@c= z1-op^VY@G~6{Q~xT#u>;qJ{oC2A3FiAfeQKCh!24AQuBv8besR&>8XO#K87RSCmJm+k>E@VE&6;RDWD-Vkxus}?8}GFm zm{P$=JAP{Q@jVtJxms}D})l8(#RN$STJiPdeX$}a4>-)oOMhX z7;Z>;4$TLK;X*iI$X{l=3`VXn!DK6=kR8$}c17%{xGBX*ZZX=@0ts$3#*$>v64Gdn zkzB@(<&h*-jI|9S*%4f4O7@PN_@Ny=&crfeo1uA@7=T%h5%gkEHEc@sr!+T97&us1 z++|WL!v`#keV{`YG;zpjsOD$U>fs>IvMbnNqo)#Hi)m+dVA4G?z2OAbH||&_lgVEP1 z5f|xdAp(RcErj2brW;)98jxtUyRq30w-u(@9z@}X^>(xo4%WN0?w2dWvo7En>(<9WmFyasxm)+7X=u$aN^yvAGb1+nXR-F4|5-4(^Pw%Npla z4HVM8Q*5P&DJV`3!ydJ zx7}gUtHk{4^?sye@D>F*fxxYf*xaBk4Y@>T1h!SeLDZ3JdtrWK1#uJiYemj22`as7 zS7I`=W6O@3wg~~i=#E-0x@Fb1Qn>EgvWgl8YZkXGJ6AND=v=!RH!r3J%gg(x=ex(l zTXv~htNJz9MyL9YYgek}!1gLm)q~qy>TkDFMK_m&wv01Y6miKioRWinsv=AR6xeTb z1Gii&JFZi;t1ddC?3I1=ctS?p=LtDw&-2S3LJNE4U?>M()$v`|_3gm+A;uXNZQoEO z(fwMISE_-w;(L_%ycXuUx>j!Bi{p_%J@WS zMuU;_70L((BkPM6bS%VqU%@p~ocV!ChJ()iiswbLzltwqDhv=w{DLDug4yi$3yuN_ z{zS)Ld?ZN7$mn@#S!UPAsLZ|>Ga%jfYD@qR;E7Q|f)bst$AuWDJJJ{#6gD3Ad2EOg znvF*XiAn~58XsaV>coo<3{92d6t@V0iw_A&?l=ZW!i5Hdq<};aTznWv3dxQFbla>b zP^io(z*Nkf8rW*z3Em=%3uXwUISLJ*sygqbK9sk=CtDP%)>q%cwiV>F|YD+reijW}5_ z-wS06*LlP%2EqTL`69(12gb!SMoL5?FkT>Mq{L*$1=%<&E*O^?7rvK>3p2^;xX_C^ zL2SlFB5g2M_irj~$fk5;m}!Gin$=7jgiOYUnm!_>FyoB0kv)M#8rH!f1iWzCNC}9e zQ})77v4 zu#Lw;-r4bOFSbr{v3Wpg`r&b>d^TEx%1NLVmBrMp^d!t z`iEiEzu#FETY2?f7}YzCsDH1%8QF^QIb_bo$oM47bEE}^GrQ2;s?e6N=qGm7T zqc|i+;l%&dz z91IMkCk;WDI1j|WOPyB|zdBAGH^Q#8JQ)Kqa(Lc3+1E#*#J!GcoXv|gN>(&ZsU!~c z1m!861U#Xv3MiaDi}>Idrm$)rZy@;qnC;rwtP{_ph{yg$)NI$+qVCpUa|@f?{OEmy zqxu`dGlF67bblC~;OjUVpU{y$Cf?W(3k5;c7iZVhsjq$*pRQ|e_vx&W#$fs@el^wr z`kB#$glkc=&-AFQapl42JUTuaoj=mzL`{*J7!!UzI(no?=vMI>P`taV1tP_YUGkMy zBWlv1Mm>&0t#tD6raX9v9EPZh)=B1@>M)}E)j^8+QVOVkc#z1q7}@V)_ZRC9fU$>) zT{GnPA6H1^zrlJhtT$x8&}qoo7mi5k_i;_CJdRQF`ROD)Z}GY5B>al?u{e2YLg@%* z{9dYw$>UrwzsH+|FIYd!l(VScPfpT*#;of5yv3)wHAvj#t?ZN1d5g}GPG5P8-in>R z@)nK|XIyzpy-c0LB~SbLZAG7c zFZf-+v)&{A0Pw8$h`%-i-kSmEqsmd^KJsa66=wr6h)3Z%#yjNKXuZ+1qP2E+1(U&U zvneuroz7#*!_CPzkLAnA^a8R~!ZG8?&MwNynJf?aZge^smhj}4cF&KF_lrmSmZ;%W z(4ke@e>?)GEGK7HX@YY=R%v(`9h~9hb9A^57}o^V&dJf9RoXi}IT_+?V@VuZj&{y= zMh}K#uJXBQt{;sKPWgUkclT`g3tU5*8S*{!7uwauK*Q#`k0^d2$zo(F||w20Q2*E z7qe3u#|AP#IMP67X&25y)6IrC5ykxgZM5|<*NcT3ER?dOvEhwn|g zuPFYs+%)a4&*1;M;-~mDuQdPf0GWmn{YQ%b6O%cE_V+Vj#8Ocy_*bc7T