Convert all ascii decoding to utf-8 decoding (#182)
authorAudrey Dutcher <audrey@rhelmot.io>
Fri, 23 Feb 2018 13:28:51 +0000 (05:28 -0800)
committerEli Bendersky <eliben@users.noreply.github.com>
Fri, 23 Feb 2018 13:28:51 +0000 (05:28 -0800)
* Convert all ascii-decoding to utf-8 decoding

* Add testcase for unicode symbols

elftools/elf/sections.py
elftools/elf/segments.py
elftools/elf/structs.py
test/test_encoding.py [new file with mode: 0644]
test/testfiles_for_unittests/unicode_symbols.elf [new file with mode: 0755]

index c2d92208d87980984743b87163844473af49b4fb..20e90562e60e6e20a7ab1e27df17df01799ac2bb 100644 (file)
@@ -137,7 +137,7 @@ class StringTableSection(Section):
         """
         table_offset = self['sh_offset']
         s = parse_cstring_from_stream(self.stream, table_offset + offset)
-        return s.decode('ascii') if s else ''
+        return s.decode('utf-8') if s else ''
 
 
 class SymbolTableSection(Section):
@@ -299,13 +299,13 @@ class ARMAttribute(object):
 
         elif self.tag in ('TAG_CPU_RAW_NAME', 'TAG_CPU_NAME', 'TAG_CONFORMANCE'):
             self.value = struct_parse(structs.Elf_ntbs('value',
-                                                       encoding='ascii'),
+                                                       encoding='utf-8'),
                                       stream)
 
         elif self.tag == 'TAG_COMPATIBILITY':
             self.value = struct_parse(structs.Elf_uleb128('value'), stream)
             self.extra = struct_parse(structs.Elf_ntbs('vendor_name',
-                                                       encoding='ascii'),
+                                                       encoding='utf-8'),
                                       stream)
 
         elif self.tag == 'TAG_ALSO_COMPATIBLE_WITH':
index c1c02793150ab19ae69f0e1f7f05f7f9e444bdbd..16560bcd8152462214f892d055661f33617d4cb1 100644 (file)
@@ -92,7 +92,7 @@ class InterpSegment(Segment):
         """
         path_offset = self['p_offset']
         return struct_parse(
-            CString('', encoding='ascii'),
+            CString('', encoding='utf-8'),
             self.stream,
             stream_pos=path_offset)
 
index 28624547d5f065d284d79b8ee8074f1a479dcd47..a89bfed1f554cb2a974574d1d60982a5a3b9b13c 100644 (file)
@@ -384,7 +384,7 @@ class ELFStructs(object):
         self.Elf_Attr_Subsection_Header = Struct('Elf_Attr_Subsection',
                                                  self.Elf_word('length'),
                                                  self.Elf_ntbs('vendor_name',
-                                                               encoding='ascii')
+                                                               encoding='utf-8')
         )
 
         # Structure of a build attribute tag.
diff --git a/test/test_encoding.py b/test/test_encoding.py
new file mode 100644 (file)
index 0000000..307a560
--- /dev/null
@@ -0,0 +1,30 @@
+# coding: utf-8
+#-------------------------------------------------------------------------------
+# elftools tests
+#
+# Audrey Dutcher (audrey@rhelmot.io)
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+
+from __future__ import unicode_literals
+import unittest
+import os
+
+from elftools.elf.elffile import ELFFile
+
+class TestUnicodeSymbols(unittest.TestCase):
+    """Test that we can handle a unicode symbol as produced by clang"""
+
+    def test_delta(self):
+        fname = os.path.join('test', 'testfiles_for_unittests',
+                'unicode_symbols.elf')
+
+        with open(fname, 'rb') as f:
+            elf = ELFFile(f)
+            symtab = elf.get_section_by_name('.symtab')
+            list(symtab.iter_symbols()) # this used to just fail
+            self.assertEqual(len(symtab.get_symbol_by_name('Δ')), 1)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/testfiles_for_unittests/unicode_symbols.elf b/test/testfiles_for_unittests/unicode_symbols.elf
new file mode 100755 (executable)
index 0000000..3872c62
Binary files /dev/null and b/test/testfiles_for_unittests/unicode_symbols.elf differ