From 82299758cc0c0ca788de094ee2d83f6f490a8ef4 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Fri, 23 Feb 2018 05:28:51 -0800 Subject: [PATCH] Convert all ascii decoding to utf-8 decoding (#182) * Convert all ascii-decoding to utf-8 decoding * Add testcase for unicode symbols --- elftools/elf/sections.py | 6 ++-- elftools/elf/segments.py | 2 +- elftools/elf/structs.py | 2 +- test/test_encoding.py | 30 ++++++++++++++++++ .../unicode_symbols.elf | Bin 0 -> 8232 bytes 5 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 test/test_encoding.py create mode 100755 test/testfiles_for_unittests/unicode_symbols.elf diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index c2d9220..20e9056 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -137,7 +137,7 @@ class StringTableSection(Section): """ table_offset = self['sh_offset'] s = parse_cstring_from_stream(self.stream, table_offset + offset) - return s.decode('ascii') if s else '' + return s.decode('utf-8') if s else '' class SymbolTableSection(Section): @@ -299,13 +299,13 @@ class ARMAttribute(object): elif self.tag in ('TAG_CPU_RAW_NAME', 'TAG_CPU_NAME', 'TAG_CONFORMANCE'): self.value = struct_parse(structs.Elf_ntbs('value', - encoding='ascii'), + encoding='utf-8'), stream) elif self.tag == 'TAG_COMPATIBILITY': self.value = struct_parse(structs.Elf_uleb128('value'), stream) self.extra = struct_parse(structs.Elf_ntbs('vendor_name', - encoding='ascii'), + encoding='utf-8'), stream) elif self.tag == 'TAG_ALSO_COMPATIBLE_WITH': diff --git a/elftools/elf/segments.py b/elftools/elf/segments.py index c1c0279..16560bc 100644 --- a/elftools/elf/segments.py +++ b/elftools/elf/segments.py @@ -92,7 +92,7 @@ class InterpSegment(Segment): """ path_offset = self['p_offset'] return struct_parse( - CString('', encoding='ascii'), + CString('', encoding='utf-8'), self.stream, stream_pos=path_offset) diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index 2862454..a89bfed 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -384,7 +384,7 @@ class ELFStructs(object): self.Elf_Attr_Subsection_Header = Struct('Elf_Attr_Subsection', self.Elf_word('length'), self.Elf_ntbs('vendor_name', - encoding='ascii') + encoding='utf-8') ) # Structure of a build attribute tag. diff --git a/test/test_encoding.py b/test/test_encoding.py new file mode 100644 index 0000000..307a560 --- /dev/null +++ b/test/test_encoding.py @@ -0,0 +1,30 @@ +# coding: utf-8 +#------------------------------------------------------------------------------- +# elftools tests +# +# Audrey Dutcher (audrey@rhelmot.io) +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- + +from __future__ import unicode_literals +import unittest +import os + +from elftools.elf.elffile import ELFFile + +class TestUnicodeSymbols(unittest.TestCase): + """Test that we can handle a unicode symbol as produced by clang""" + + def test_delta(self): + fname = os.path.join('test', 'testfiles_for_unittests', + 'unicode_symbols.elf') + + with open(fname, 'rb') as f: + elf = ELFFile(f) + symtab = elf.get_section_by_name('.symtab') + list(symtab.iter_symbols()) # this used to just fail + self.assertEqual(len(symtab.get_symbol_by_name('Δ')), 1) + +if __name__ == '__main__': + unittest.main() diff --git a/test/testfiles_for_unittests/unicode_symbols.elf b/test/testfiles_for_unittests/unicode_symbols.elf new file mode 100755 index 0000000000000000000000000000000000000000..3872c62cd8bce0223f817f5643e0e87ee630ce02 GIT binary patch literal 8232 zcmeHMU2Ggz6+XM`ztqWk6C7-kRv93X9FW&PaU4TbGHZLCF*rYUYzhKryz#E>75ig$ zXNz3~q%tj8g+L^zRNf*GQiY((Lsc0bh$Dw4PgN>}gw!Hg6&9wn)uMux2b$%aJLl}q zP9}hO;00f4=A84LpZhy^&%OJt=-7zg=L0Ez_zaK~s?!x@zX3MemB`ot>;N4egGZqO zk$QY|r6Fo_Lho^Cp-pTTKw5{-fZqlN{JLU8my5&?qKHp8^9(hk`E0mX^_$|8?a9-VswSFm2}OX+zu*#A89QEt%j1CmE!&J1dpxWN z^TN9k*F5Y2`#D41>W76~GGonxX%b}Stys=7)3Ia*PL7QZ4w=2--tbY>&ZV;%)|wR1 zulnu7UQj<|+zb1XJ4kns>nycQD~{&SIZpQ+IE|H*_RzFq8}UtXr5MZaG#QucAKR&c z@$9PQFW)tmZ#Aun8p=oapsIYR72hofbmT~$C;gw-%jI&z`$%+{4YwTlHZ=sRDsD{urHGYRWgMfBapM+$QMaT2Aj8Xbhq|$8%d5Vk z&0X$Oz;)RC{e2vltzz^ZwsZMb2%Ay~e>L)C^uI6BL_c5FxlX;fkkXvlz>Pu^qhZam+W1n!|uR~s85i7Fc|v2e=ykoy+Aam zU8x%kc6_(q2zEc$U<3!28&3uGTyP)~?2ZIG27?;fq5WX6NquKONB6rJ$N4=`$nCN{ z0^1|7Jp$V!uss6*?<25|`5cw&OeL2Tx>ITKcQl|YZWqfhbN2KRmgPLPpJh1*?GrgT ztvLGqry}|~TZBcwuDHM7FK3Cl=GXCEJ?m*)zvKz4ejT*{kEl1m38s|yQ)#ye>}4vX zl0##zM@4c2v6uX&QE36%n67aBf?r};`a$OhDwx!(BJnW&Q2m)_drZBQ>|?tU%hRl9 z_F?peou?pHTJoa>V4iuDhkD^>dpi5I4l6d7?;4MeMI+~;W?y$t*K9HqOC5Gs;*tY+!Wr=EE|kuY{>pjUsSKcI z(bK-JvaV|F>tWT?z5!(2)!H|L*L($VRm(ir+V22aFSYn1AnU3YZ?1fAwfIgbd90HF z?5f;*ahI+X$VFdmq_?At)|#M`0I6R+%2mGHEV>)@~D3cw8QcYYjC-L?MddW7%=7yePk zW&h+ba10qA?n2TYs*lPDK0c4}Nh412Wj`icDognIJjVG2#J$@8uzp=radyKy9`ooG z=OwhKn-cdt-cWHKoJTrWs+$$p{Jx>^2iMhmi2FR|w+=h72giYK9jM6ud=7C9cVqXw zw;~=wT*g^B;VY=(Jh=Y%D_pFqvDivsL1b9??ddLK%DAzx92?Knu<{SN+Z78 z-v%A7_4zd6Q2idI-Xh{O4s`88MV*C_{tBNEt}D7eah509KE&%&&g?%zob27>^Ha7j zvAsHrW5AywPVIEZ|ADgi*q3HGIB0DVNOH_$D$Nf-3ZR)3#!RrCgL$G#`lG~?z|p+?T2Dry+5qA z!0|$7`vMVdYi<)!m(QB>u}nOb0CV&VTE&wYvye|-@*XRy{R z#==)vRbU6h-a{CdSsIs)^F#SBdCF_?6R7M*l8?y$7n0XXc(QetkE1}jqwHrBejTOi z{{xzmsxR3G1UZUKJwDN|Eugj z$N?qqTVa2(BY(_#9vHu`BSzy&?V}>+vAgUq=M-C8{rwX8DP)}fl1G+Y{TF-rpF-#} zE`Rx*3Go8^F$H%N@e}Af$T;hly!2%rFgZ7gy{uc2udu(2zvQp0>`&j8v$*}AM