4814c7f7998571e481784c10bec96ba7df14579f
[pyelftools.git] / elftools / elf / elffile.py
1 #-------------------------------------------------------------------------------
2 # elftools: elf/elffile.py
3 #
4 # ELFFile - main class for accessing ELF files
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from ..common.py3compat import BytesIO
10 from ..common.exceptions import ELFError
11 from ..common.utils import struct_parse, elf_assert
12 from ..construct import ConstructError
13 from .structs import ELFStructs
14 from .sections import (
15 Section, StringTableSection, SymbolTableSection, NullSection)
16 from .relocation import RelocationSection, RelocationHandler
17 from .segments import Segment, InterpSegment
18 from .enums import ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64
19 from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
20
21
22 class ELFFile(object):
23 """ Creation: the constructor accepts a stream (file-like object) with the
24 contents of an ELF file.
25
26 Accessible attributes:
27
28 stream:
29 The stream holding the data of the file - must be a binary
30 stream (bytes, not string).
31
32 elfclass:
33 32 or 64 - specifies the word size of the target machine
34
35 little_endian:
36 boolean - specifies the target machine's endianness
37
38 header:
39 the complete ELF file header
40
41 e_ident_raw:
42 the raw e_ident field of the header
43 """
44 def __init__(self, stream):
45 self.stream = stream
46 self._identify_file()
47 self.structs = ELFStructs(
48 little_endian=self.little_endian,
49 elfclass=self.elfclass)
50 self.header = self._parse_elf_header()
51
52 self.stream.seek(0)
53 self.e_ident_raw = self.stream.read(16)
54
55 self._file_stringtable_section = self._get_file_stringtable()
56 self._section_name_map = None
57
58 def num_sections(self):
59 """ Number of sections in the file
60 """
61 return self['e_shnum']
62
63 def get_section(self, n):
64 """ Get the section at index #n from the file (Section object or a
65 subclass)
66 """
67 section_header = self._get_section_header(n)
68 return self._make_section(section_header)
69
70 def get_section_by_name(self, name):
71 """ Get a section from the file, by name. Return None if no such
72 section exists.
73 """
74 # The first time this method is called, construct a name to number
75 # mapping
76 #
77 if self._section_name_map is None:
78 self._section_name_map = {}
79 for i, sec in enumerate(self.iter_sections()):
80 self._section_name_map[sec.name] = i
81 secnum = self._section_name_map.get(name, None)
82 return None if secnum is None else self.get_section(secnum)
83
84 def iter_sections(self):
85 """ Yield all the sections in the file
86 """
87 for i in range(self.num_sections()):
88 yield self.get_section(i)
89
90 def num_segments(self):
91 """ Number of segments in the file
92 """
93 return self['e_phnum']
94
95 def get_segment(self, n):
96 """ Get the segment at index #n from the file (Segment object)
97 """
98 segment_header = self._get_segment_header(n)
99 return self._make_segment(segment_header)
100
101 def iter_segments(self):
102 """ Yield all the segments in the file
103 """
104 for i in range(self.num_segments()):
105 yield self.get_segment(i)
106
107 def has_dwarf_info(self):
108 """ Check whether this file appears to have debugging information.
109 We assume that if it has the debug_info section, it has all theother
110 required sections as well.
111 """
112 return bool(self.get_section_by_name(b'.debug_info'))
113
114 def get_dwarf_info(self, relocate_dwarf_sections=True):
115 """ Return a DWARFInfo object representing the debugging information in
116 this file.
117
118 If relocate_dwarf_sections is True, relocations for DWARF sections
119 are looked up and applied.
120 """
121 # Expect that has_dwarf_info was called, so at least .debug_info is
122 # present.
123 # Sections that aren't found will be passed as None to DWARFInfo.
124 #
125 debug_sections = {}
126 for secname in (b'.debug_info', b'.debug_abbrev', b'.debug_str',
127 b'.debug_line', b'.debug_frame', b'.debug_loc',
128 b'.debug_ranges'):
129 section = self.get_section_by_name(secname)
130 if section is None:
131 debug_sections[secname] = None
132 else:
133 debug_sections[secname] = self._read_dwarf_section(
134 section,
135 relocate_dwarf_sections)
136
137 return DWARFInfo(
138 config=DwarfConfig(
139 little_endian=self.little_endian,
140 default_address_size=self.elfclass / 8,
141 machine_arch=self.get_machine_arch()),
142 debug_info_sec=debug_sections[b'.debug_info'],
143 debug_abbrev_sec=debug_sections[b'.debug_abbrev'],
144 debug_frame_sec=debug_sections[b'.debug_frame'],
145 debug_str_sec=debug_sections[b'.debug_str'],
146 debug_loc_sec=debug_sections[b'.debug_loc'],
147 debug_ranges_sec=debug_sections[b'.debug_ranges'],
148 debug_line_sec=debug_sections[b'.debug_line'])
149
150 def get_machine_arch(self):
151 """ Return the machine architecture, as detected from the ELF header.
152 Not all architectures are supported at the moment.
153 """
154 if self['e_machine'] == 'EM_X86_64':
155 return 'x64'
156 elif self['e_machine'] in ('EM_386', 'EM_486'):
157 return 'x86'
158 elif self['e_machine'] == 'EM_ARM':
159 return 'ARM'
160 else:
161 return '<unknown>'
162
163 #-------------------------------- PRIVATE --------------------------------#
164
165 def __getitem__(self, name):
166 """ Implement dict-like access to header entries
167 """
168 return self.header[name]
169
170 def _identify_file(self):
171 """ Verify the ELF file and identify its class and endianness.
172 """
173 # Note: this code reads the stream directly, without using ELFStructs,
174 # since we don't yet know its exact format. ELF was designed to be
175 # read like this - its e_ident field is word-size and endian agnostic.
176 #
177 self.stream.seek(0)
178 magic = self.stream.read(4)
179 elf_assert(magic == b'\x7fELF', 'Magic number does not match')
180
181 ei_class = self.stream.read(1)
182 if ei_class == b'\x01':
183 self.elfclass = 32
184 elif ei_class == b'\x02':
185 self.elfclass = 64
186 else:
187 raise ELFError('Invalid EI_CLASS %s' % repr(ei_class))
188
189 ei_data = self.stream.read(1)
190 if ei_data == b'\x01':
191 self.little_endian = True
192 elif ei_data == b'\x02':
193 self.little_endian = False
194 else:
195 raise ELFError('Invalid EI_DATA %s' % repr(ei_data))
196
197 def _section_offset(self, n):
198 """ Compute the offset of section #n in the file
199 """
200 return self['e_shoff'] + n * self['e_shentsize']
201
202 def _segment_offset(self, n):
203 """ Compute the offset of segment #n in the file
204 """
205 return self['e_phoff'] + n * self['e_phentsize']
206
207 def _make_segment(self, segment_header):
208 """ Create a Segment object of the appropriate type
209 """
210 segtype = segment_header['p_type']
211 if segtype == 'PT_INTERP':
212 return InterpSegment(segment_header, self.stream)
213 else:
214 return Segment(segment_header, self.stream)
215
216 def _get_section_header(self, n):
217 """ Find the header of section #n, parse it and return the struct
218 """
219 return struct_parse(
220 self.structs.Elf_Shdr,
221 self.stream,
222 stream_pos=self._section_offset(n))
223
224 def _get_section_name(self, section_header):
225 """ Given a section header, find this section's name in the file's
226 string table
227 """
228 name_offset = section_header['sh_name']
229 return self._file_stringtable_section.get_string(name_offset)
230
231 def _make_section(self, section_header):
232 """ Create a section object of the appropriate type
233 """
234 name = self._get_section_name(section_header)
235 sectype = section_header['sh_type']
236
237 if sectype == 'SHT_STRTAB':
238 return StringTableSection(section_header, name, self.stream)
239 elif sectype == 'SHT_NULL':
240 return NullSection(section_header, name, self.stream)
241 elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM'):
242 return self._make_symbol_table_section(section_header, name)
243 elif sectype in ('SHT_REL', 'SHT_RELA'):
244 return RelocationSection(
245 section_header, name, self.stream, self)
246 else:
247 return Section(section_header, name, self.stream)
248
249 def _make_symbol_table_section(self, section_header, name):
250 """ Create a SymbolTableSection
251 """
252 linked_strtab_index = section_header['sh_link']
253 strtab_section = self.get_section(linked_strtab_index)
254 return SymbolTableSection(
255 section_header, name, self.stream,
256 elffile=self,
257 stringtable=strtab_section)
258
259 def _get_segment_header(self, n):
260 """ Find the header of segment #n, parse it and return the struct
261 """
262 return struct_parse(
263 self.structs.Elf_Phdr,
264 self.stream,
265 stream_pos=self._segment_offset(n))
266
267 def _get_file_stringtable(self):
268 """ Find the file's string table section
269 """
270 stringtable_section_num = self['e_shstrndx']
271 return StringTableSection(
272 header=self._get_section_header(stringtable_section_num),
273 name='',
274 stream=self.stream)
275
276 def _parse_elf_header(self):
277 """ Parses the ELF file header and assigns the result to attributes
278 of this object.
279 """
280 return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0)
281
282 def _read_dwarf_section(self, section, relocate_dwarf_sections):
283 """ Read the contents of a DWARF section from the stream and return a
284 DebugSectionDescriptor. Apply relocations if asked to.
285 """
286 self.stream.seek(section['sh_offset'])
287 # The section data is read into a new stream, for processing
288 section_stream = BytesIO()
289 section_stream.write(self.stream.read(section['sh_size']))
290
291 if relocate_dwarf_sections:
292 reloc_handler = RelocationHandler(self)
293 reloc_section = reloc_handler.find_relocations_for_section(section)
294 if reloc_section is not None:
295 reloc_handler.apply_section_relocations(
296 section_stream, reloc_section)
297
298 return DebugSectionDescriptor(
299 stream=section_stream,
300 name=section.name,
301 global_offset=section['sh_offset'],
302 size=section['sh_size'])
303
304