From: Audrey Dutcher Date: Fri, 23 Feb 2018 13:28:51 +0000 (-0800) Subject: Convert all ascii decoding to utf-8 decoding (#182) X-Git-Tag: v0.25~11 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=82299758cc0c0ca788de094ee2d83f6f490a8ef4;p=pyelftools.git Convert all ascii decoding to utf-8 decoding (#182) * Convert all ascii-decoding to utf-8 decoding * Add testcase for unicode symbols --- diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index c2d9220..20e9056 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -137,7 +137,7 @@ class StringTableSection(Section): """ table_offset = self['sh_offset'] s = parse_cstring_from_stream(self.stream, table_offset + offset) - return s.decode('ascii') if s else '' + return s.decode('utf-8') if s else '' class SymbolTableSection(Section): @@ -299,13 +299,13 @@ class ARMAttribute(object): elif self.tag in ('TAG_CPU_RAW_NAME', 'TAG_CPU_NAME', 'TAG_CONFORMANCE'): self.value = struct_parse(structs.Elf_ntbs('value', - encoding='ascii'), + encoding='utf-8'), stream) elif self.tag == 'TAG_COMPATIBILITY': self.value = struct_parse(structs.Elf_uleb128('value'), stream) self.extra = struct_parse(structs.Elf_ntbs('vendor_name', - encoding='ascii'), + encoding='utf-8'), stream) elif self.tag == 'TAG_ALSO_COMPATIBLE_WITH': diff --git a/elftools/elf/segments.py b/elftools/elf/segments.py index c1c0279..16560bc 100644 --- a/elftools/elf/segments.py +++ b/elftools/elf/segments.py @@ -92,7 +92,7 @@ class InterpSegment(Segment): """ path_offset = self['p_offset'] return struct_parse( - CString('', encoding='ascii'), + CString('', encoding='utf-8'), self.stream, stream_pos=path_offset) diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index 2862454..a89bfed 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -384,7 +384,7 @@ class ELFStructs(object): self.Elf_Attr_Subsection_Header = Struct('Elf_Attr_Subsection', self.Elf_word('length'), self.Elf_ntbs('vendor_name', - encoding='ascii') + encoding='utf-8') ) # Structure of a build attribute tag. diff --git a/test/test_encoding.py b/test/test_encoding.py new file mode 100644 index 0000000..307a560 --- /dev/null +++ b/test/test_encoding.py @@ -0,0 +1,30 @@ +# coding: utf-8 +#------------------------------------------------------------------------------- +# elftools tests +# +# Audrey Dutcher (audrey@rhelmot.io) +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- + +from __future__ import unicode_literals +import unittest +import os + +from elftools.elf.elffile import ELFFile + +class TestUnicodeSymbols(unittest.TestCase): + """Test that we can handle a unicode symbol as produced by clang""" + + def test_delta(self): + fname = os.path.join('test', 'testfiles_for_unittests', + 'unicode_symbols.elf') + + with open(fname, 'rb') as f: + elf = ELFFile(f) + symtab = elf.get_section_by_name('.symtab') + list(symtab.iter_symbols()) # this used to just fail + self.assertEqual(len(symtab.get_symbol_by_name('Δ')), 1) + +if __name__ == '__main__': + unittest.main() diff --git a/test/testfiles_for_unittests/unicode_symbols.elf b/test/testfiles_for_unittests/unicode_symbols.elf new file mode 100755 index 0000000..3872c62 Binary files /dev/null and b/test/testfiles_for_unittests/unicode_symbols.elf differ