added support for version definition, version dependency and version symbol sections...
[pyelftools.git] / elftools / elf / elffile.py
1 #-------------------------------------------------------------------------------
2 # elftools: elf/elffile.py
3 #
4 # ELFFile - main class for accessing ELF files
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from ..common.py3compat import BytesIO
10 from ..common.exceptions import ELFError
11 from ..common.utils import struct_parse, elf_assert
12 from ..construct import ConstructError
13 from .structs import ELFStructs
14 from .sections import (
15 Section, StringTableSection, SymbolTableSection,
16 SUNWSyminfoTableSection, VerneedTableSection,
17 VerdefTableSection, VersymTableSection,
18 NullSection)
19 from .dynamic import DynamicSection, DynamicSegment
20 from .relocation import RelocationSection, RelocationHandler
21 from .segments import Segment, InterpSegment
22 from .enums import ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64
23 from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
24
25
26 class ELFFile(object):
27 """ Creation: the constructor accepts a stream (file-like object) with the
28 contents of an ELF file.
29
30 Accessible attributes:
31
32 stream:
33 The stream holding the data of the file - must be a binary
34 stream (bytes, not string).
35
36 elfclass:
37 32 or 64 - specifies the word size of the target machine
38
39 little_endian:
40 boolean - specifies the target machine's endianness
41
42 header:
43 the complete ELF file header
44
45 e_ident_raw:
46 the raw e_ident field of the header
47 """
48 def __init__(self, stream):
49 self.stream = stream
50 self._identify_file()
51 self.structs = ELFStructs(
52 little_endian=self.little_endian,
53 elfclass=self.elfclass)
54 self.header = self._parse_elf_header()
55
56 self.stream.seek(0)
57 self.e_ident_raw = self.stream.read(16)
58
59 self._file_stringtable_section = self._get_file_stringtable()
60 self._section_name_map = None
61
62 def num_sections(self):
63 """ Number of sections in the file
64 """
65 return self['e_shnum']
66
67 def get_section(self, n):
68 """ Get the section at index #n from the file (Section object or a
69 subclass)
70 """
71 section_header = self._get_section_header(n)
72 return self._make_section(section_header)
73
74 def get_section_by_name(self, name):
75 """ Get a section from the file, by name. Return None if no such
76 section exists.
77 """
78 # The first time this method is called, construct a name to number
79 # mapping
80 #
81 if self._section_name_map is None:
82 self._section_name_map = {}
83 for i, sec in enumerate(self.iter_sections()):
84 self._section_name_map[sec.name] = i
85 secnum = self._section_name_map.get(name, None)
86 return None if secnum is None else self.get_section(secnum)
87
88 def iter_sections(self):
89 """ Yield all the sections in the file
90 """
91 for i in range(self.num_sections()):
92 yield self.get_section(i)
93
94 def num_segments(self):
95 """ Number of segments in the file
96 """
97 return self['e_phnum']
98
99 def get_segment(self, n):
100 """ Get the segment at index #n from the file (Segment object)
101 """
102 segment_header = self._get_segment_header(n)
103 return self._make_segment(segment_header)
104
105 def iter_segments(self):
106 """ Yield all the segments in the file
107 """
108 for i in range(self.num_segments()):
109 yield self.get_segment(i)
110
111 def has_dwarf_info(self):
112 """ Check whether this file appears to have debugging information.
113 We assume that if it has the debug_info section, it has all theother
114 required sections as well.
115 """
116 return bool(self.get_section_by_name(b'.debug_info'))
117
118 def get_dwarf_info(self, relocate_dwarf_sections=True):
119 """ Return a DWARFInfo object representing the debugging information in
120 this file.
121
122 If relocate_dwarf_sections is True, relocations for DWARF sections
123 are looked up and applied.
124 """
125 # Expect that has_dwarf_info was called, so at least .debug_info is
126 # present.
127 # Sections that aren't found will be passed as None to DWARFInfo.
128 #
129 debug_sections = {}
130 for secname in (b'.debug_info', b'.debug_abbrev', b'.debug_str',
131 b'.debug_line', b'.debug_frame',
132 b'.debug_loc', b'.debug_ranges'):
133 section = self.get_section_by_name(secname)
134 if section is None:
135 debug_sections[secname] = None
136 else:
137 debug_sections[secname] = self._read_dwarf_section(
138 section,
139 relocate_dwarf_sections)
140
141 return DWARFInfo(
142 config=DwarfConfig(
143 little_endian=self.little_endian,
144 default_address_size=self.elfclass / 8,
145 machine_arch=self.get_machine_arch()),
146 debug_info_sec=debug_sections[b'.debug_info'],
147 debug_abbrev_sec=debug_sections[b'.debug_abbrev'],
148 debug_frame_sec=debug_sections[b'.debug_frame'],
149 # TODO(eliben): reading of eh_frame is not hooked up yet
150 eh_frame_sec=None,
151 debug_str_sec=debug_sections[b'.debug_str'],
152 debug_loc_sec=debug_sections[b'.debug_loc'],
153 debug_ranges_sec=debug_sections[b'.debug_ranges'],
154 debug_line_sec=debug_sections[b'.debug_line'])
155
156 def get_machine_arch(self):
157 """ Return the machine architecture, as detected from the ELF header.
158 Not all architectures are supported at the moment.
159 """
160 if self['e_machine'] == 'EM_X86_64':
161 return 'x64'
162 elif self['e_machine'] in ('EM_386', 'EM_486'):
163 return 'x86'
164 elif self['e_machine'] == 'EM_ARM':
165 return 'ARM'
166 else:
167 return '<unknown>'
168
169 #-------------------------------- PRIVATE --------------------------------#
170
171 def __getitem__(self, name):
172 """ Implement dict-like access to header entries
173 """
174 return self.header[name]
175
176 def _identify_file(self):
177 """ Verify the ELF file and identify its class and endianness.
178 """
179 # Note: this code reads the stream directly, without using ELFStructs,
180 # since we don't yet know its exact format. ELF was designed to be
181 # read like this - its e_ident field is word-size and endian agnostic.
182 #
183 self.stream.seek(0)
184 magic = self.stream.read(4)
185 elf_assert(magic == b'\x7fELF', 'Magic number does not match')
186
187 ei_class = self.stream.read(1)
188 if ei_class == b'\x01':
189 self.elfclass = 32
190 elif ei_class == b'\x02':
191 self.elfclass = 64
192 else:
193 raise ELFError('Invalid EI_CLASS %s' % repr(ei_class))
194
195 ei_data = self.stream.read(1)
196 if ei_data == b'\x01':
197 self.little_endian = True
198 elif ei_data == b'\x02':
199 self.little_endian = False
200 else:
201 raise ELFError('Invalid EI_DATA %s' % repr(ei_data))
202
203 def _section_offset(self, n):
204 """ Compute the offset of section #n in the file
205 """
206 return self['e_shoff'] + n * self['e_shentsize']
207
208 def _segment_offset(self, n):
209 """ Compute the offset of segment #n in the file
210 """
211 return self['e_phoff'] + n * self['e_phentsize']
212
213 def _make_segment(self, segment_header):
214 """ Create a Segment object of the appropriate type
215 """
216 segtype = segment_header['p_type']
217 if segtype == 'PT_INTERP':
218 return InterpSegment(segment_header, self.stream)
219 elif segtype == 'PT_DYNAMIC':
220 return DynamicSegment(segment_header, self.stream, self)
221 else:
222 return Segment(segment_header, self.stream)
223
224 def _get_section_header(self, n):
225 """ Find the header of section #n, parse it and return the struct
226 """
227 return struct_parse(
228 self.structs.Elf_Shdr,
229 self.stream,
230 stream_pos=self._section_offset(n))
231
232 def _get_section_name(self, section_header):
233 """ Given a section header, find this section's name in the file's
234 string table
235 """
236 name_offset = section_header['sh_name']
237 return self._file_stringtable_section.get_string(name_offset)
238
239 def _make_section(self, section_header):
240 """ Create a section object of the appropriate type
241 """
242 name = self._get_section_name(section_header)
243 sectype = section_header['sh_type']
244
245 if sectype == 'SHT_STRTAB':
246 return StringTableSection(section_header, name, self.stream)
247 elif sectype == 'SHT_NULL':
248 return NullSection(section_header, name, self.stream)
249 elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM'):
250 return self._make_symbol_table_section(section_header, name)
251 elif sectype == 'SHT_SUNW_syminfo':
252 return self._make_sunwsyminfo_table_section(section_header, name)
253 elif sectype == 'SHT_GNU_verneed':
254 return self._make_verneed_table_section(section_header, name)
255 elif sectype == 'SHT_GNU_verdef':
256 return self._make_verdef_table_section(section_header, name)
257 elif sectype == 'SHT_GNU_versym':
258 return self._make_versym_table_section(section_header, name)
259 elif sectype in ('SHT_REL', 'SHT_RELA'):
260 return RelocationSection(
261 section_header, name, self.stream, self)
262 elif sectype == 'SHT_DYNAMIC':
263 return DynamicSection(section_header, name, self.stream, self)
264 else:
265 return Section(section_header, name, self.stream)
266
267 def _make_symbol_table_section(self, section_header, name):
268 """ Create a SymbolTableSection
269 """
270 linked_strtab_index = section_header['sh_link']
271 strtab_section = self.get_section(linked_strtab_index)
272 return SymbolTableSection(
273 section_header, name, self.stream,
274 elffile=self,
275 stringtable=strtab_section)
276
277 def _make_sunwsyminfo_table_section(self, section_header, name):
278 """ Create a SUNWSyminfoTableSection
279 """
280 linked_strtab_index = section_header['sh_link']
281 strtab_section = self.get_section(linked_strtab_index)
282 return SUNWSyminfoTableSection(
283 section_header, name, self.stream,
284 elffile=self,
285 symboltable=strtab_section)
286
287 def _make_verneed_table_section(self, section_header, name):
288 """ Create a VerneedTableSection
289 """
290 linked_strtab_index = section_header['sh_link']
291 strtab_section = self.get_section(linked_strtab_index)
292 return VerneedTableSection(
293 section_header, name, self.stream,
294 elffile=self,
295 stringtable=strtab_section)
296
297 def _make_verdef_table_section(self, section_header, name):
298 """ Create a VerdefTableSection
299 """
300 linked_strtab_index = section_header['sh_link']
301 strtab_section = self.get_section(linked_strtab_index)
302 return VerdefTableSection(
303 section_header, name, self.stream,
304 elffile=self,
305 stringtable=strtab_section)
306
307 def _make_versym_table_section(self, section_header, name):
308 """ Create a VersymTableSection
309 """
310 linked_strtab_index = section_header['sh_link']
311 strtab_section = self.get_section(linked_strtab_index)
312 return VersymTableSection(
313 section_header, name, self.stream,
314 elffile=self,
315 symboltable=strtab_section)
316
317 def _get_segment_header(self, n):
318 """ Find the header of segment #n, parse it and return the struct
319 """
320 return struct_parse(
321 self.structs.Elf_Phdr,
322 self.stream,
323 stream_pos=self._segment_offset(n))
324
325 def _get_file_stringtable(self):
326 """ Find the file's string table section
327 """
328 stringtable_section_num = self['e_shstrndx']
329 return StringTableSection(
330 header=self._get_section_header(stringtable_section_num),
331 name='',
332 stream=self.stream)
333
334 def _parse_elf_header(self):
335 """ Parses the ELF file header and assigns the result to attributes
336 of this object.
337 """
338 return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0)
339
340 def _read_dwarf_section(self, section, relocate_dwarf_sections):
341 """ Read the contents of a DWARF section from the stream and return a
342 DebugSectionDescriptor. Apply relocations if asked to.
343 """
344 self.stream.seek(section['sh_offset'])
345 # The section data is read into a new stream, for processing
346 section_stream = BytesIO()
347 section_stream.write(self.stream.read(section['sh_size']))
348
349 if relocate_dwarf_sections:
350 reloc_handler = RelocationHandler(self)
351 reloc_section = reloc_handler.find_relocations_for_section(section)
352 if reloc_section is not None:
353 reloc_handler.apply_section_relocations(
354 section_stream, reloc_section)
355
356 return DebugSectionDescriptor(
357 stream=section_stream,
358 name=section.name,
359 global_offset=section['sh_offset'],
360 size=section['sh_size'])
361
362