reduce mmap BLOCK_SIZE to 1 << 28 so it works on armv7a
[openpower-isa.git] / src / openpower / decoder / isa / mem.py
1 # SPDX-License-Identifier: LGPLv3+
2 # Copyright (C) 2020, 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
3 # Funded by NLnet http://nlnet.nl
4 """core of the python-based POWER9 simulator
5
6 this is part of a cycle-accurate POWER9 simulator. its primary purpose is
7 not speed, it is for both learning and educational purposes, as well as
8 a method of verifying the HDL.
9
10 related bugs:
11
12 * https://bugs.libre-soc.org/show_bug.cgi?id=424
13 """
14
15 from collections import defaultdict
16 from openpower.decoder.selectable_int import SelectableInt
17 from openpower.util import log, LogKind
18 import math
19 import enum
20 from cached_property import cached_property
21 import mmap
22 from pickle import PicklingError
23 import ctypes
24
25
26 def swap_order(x, nbytes):
27 x = x.to_bytes(nbytes, byteorder='little')
28 x = int.from_bytes(x, byteorder='big', signed=False)
29 return x
30
31
32 class MemException(Exception):
33 pass
34
35
36 def process_mem(initial_mem, row_bytes=8):
37 res = {}
38 # different types of memory data structures recognised (for convenience)
39 if isinstance(initial_mem, list):
40 initial_mem = (0, initial_mem)
41 if isinstance(initial_mem, tuple):
42 startaddr, mem = initial_mem
43 initial_mem = {}
44 for i, val in enumerate(mem):
45 initial_mem[startaddr + row_bytes*i] = (val, row_bytes)
46
47 for addr, val in initial_mem.items():
48 if isinstance(val, tuple):
49 (val, width) = val
50 else:
51 width = row_bytes # assume same width
52 # val = swap_order(val, width)
53 res[addr] = (val, width)
54
55 return res
56
57
58 @enum.unique
59 class _ReadReason(enum.Enum):
60 Read = enum.auto()
61 SubWordWrite = enum.auto()
62 Dump = enum.auto()
63 Execute = enum.auto()
64
65 @cached_property
66 def read_default(self):
67 if self in (self.SubWordWrite, self.Dump):
68 return 0
69 return None
70
71 @cached_property
72 def needed_mmap_page_flag(self):
73 if self is self.Execute:
74 return _MMapPageFlags.X
75 return _MMapPageFlags.R
76
77
78 class MemCommon:
79 def __init__(self, row_bytes, initial_mem, misaligned_ok):
80 self.bytes_per_word = row_bytes
81 self.word_log2 = math.ceil(math.log2(row_bytes))
82 self.last_ld_addr = None
83 self.last_st_addr = None
84 self.misaligned_ok = misaligned_ok
85 log("Sim-Mem", initial_mem, self.bytes_per_word, self.word_log2)
86 if not initial_mem:
87 return
88
89 self.initialize(row_bytes, initial_mem)
90
91 def initialize(self, row_bytes, initial_mem):
92 for addr, (val, width) in process_mem(initial_mem, row_bytes).items():
93 # val = swap_order(val, width)
94 self.st(addr, val, width, swap=False)
95
96 def _read_word(self, word_idx, reason):
97 raise NotImplementedError
98
99 def _write_word(self, word_idx, value):
100 raise NotImplementedError
101
102 def word_idxs(self):
103 raise NotImplementedError
104 yield 0
105
106 def _get_shifter_mask(self, wid, remainder):
107 shifter = ((self.bytes_per_word - wid) - remainder) * \
108 8 # bits per byte
109 # XXX https://bugs.libre-soc.org/show_bug.cgi?id=377
110 # BE/LE mode?
111 shifter = remainder * 8
112 mask = (1 << (wid * 8)) - 1
113 log("width,rem,shift,mask", wid, remainder, hex(shifter), hex(mask))
114 return shifter, mask
115
116 # TODO: Implement ld/st of lesser width
117 def ld(self, address, width=8, swap=True, check_in_mem=False,
118 instr_fetch=False, reason=None):
119 log("ld from addr 0x%x width %d" % (address, width),
120 swap, check_in_mem, instr_fetch)
121 self.last_ld_addr = address # record last load
122 ldaddr = address
123 remainder = address & (self.bytes_per_word - 1)
124 address = address >> self.word_log2
125 if remainder & (width - 1) != 0:
126 exc = MemException("unaligned",
127 "Unaligned access: remainder %x width %d" %
128 (remainder, width))
129 exc.dar = ldaddr
130 raise exc
131 if reason is None:
132 reason = _ReadReason.Execute if instr_fetch else _ReadReason.Read
133 val = self._read_word(address, reason)
134 if val is None:
135 if check_in_mem:
136 return None
137 else:
138 val = 0
139 log("ld mem @ 0x%x rem %d : 0x%x" % (ldaddr, remainder, val))
140
141 if width != self.bytes_per_word:
142 shifter, mask = self._get_shifter_mask(width, remainder)
143 log("masking", hex(val), hex(mask << shifter), shifter)
144 val = val & (mask << shifter)
145 val >>= shifter
146 if swap:
147 val = swap_order(val, width)
148 log("Read 0x%x from addr 0x%x" % (val, ldaddr))
149 return val
150
151 def _st(self, addr, v, width=8, swap=True):
152 staddr = addr
153 remainder = addr & (self.bytes_per_word - 1)
154 addr = addr >> self.word_log2
155 log("Writing 0x%x to ST 0x%x memaddr 0x%x/%x swap %s" %
156 (v, staddr, addr, remainder, str(swap)))
157 if not self.misaligned_ok and remainder & (width - 1) != 0:
158 exc = MemException("unaligned",
159 "Unaligned access: remainder %x width %d" %
160 (remainder, width))
161 exc.dar = staddr
162 raise exc
163 if swap:
164 v = swap_order(v, width)
165 if width != self.bytes_per_word:
166 val = self._read_word(addr, _ReadReason.SubWordWrite)
167 shifter, mask = self._get_shifter_mask(width, remainder)
168 val &= ~(mask << shifter)
169 val |= v << shifter
170 self._write_word(addr, val)
171 else:
172 val = v
173 self._write_word(addr, v)
174 log("mem @ 0x%x: 0x%x" % (staddr, val))
175
176 def st(self, st_addr, v, width=8, swap=True):
177 self.last_st_addr = st_addr # record last store
178 # misaligned not allowed: pass straight to Mem._st
179 if not self.misaligned_ok:
180 return self._st(st_addr, v, width, swap)
181 remainder = st_addr & (self.bytes_per_word - 1)
182 if swap:
183 v = swap_order(v, width)
184 # not misaligned: pass through to Mem._st but we've swapped already
185 misaligned = remainder & (width - 1)
186 if misaligned == 0 or (remainder + width <= self.bytes_per_word):
187 return self._st(st_addr, v, width, swap=False)
188 shifter, mask = self._get_shifter_mask(width, remainder)
189 # split into two halves. lower first
190 maxmask = (1 << (self.bytes_per_word)*8) - 1
191 val1 = ((v << shifter) & maxmask) >> shifter
192 self._st(st_addr, val1, width=width-misaligned, swap=False)
193 # now upper.
194 val2 = v >> ((width-misaligned)*8)
195 addr2 = (st_addr >> self.word_log2) << self.word_log2
196 addr2 += self.bytes_per_word
197 print("v, val2", hex(v), hex(val2), "ad", addr2)
198 self._st(addr2, val2, width=width-misaligned, swap=False)
199
200 def __call__(self, addr, sz):
201 val = self.ld(addr.value, sz, swap=False)
202 log("memread", addr, sz, val)
203 return SelectableInt(val, sz*8)
204
205 def memassign(self, addr, sz, val):
206 log("memassign", addr, sz, val)
207 self.st(addr.value, val.value, sz, swap=False)
208
209 def dump(self, printout=True, asciidump=False):
210 keys = list(self.word_idxs())
211 keys.sort()
212 res = []
213 for k in keys:
214 v = self._read_word(k, _ReadReason.Dump)
215 res.append((k*8, v))
216 if not printout:
217 continue
218 s = ""
219 if asciidump:
220 for i in range(8):
221 c = chr(v >> (i*8) & 0xff)
222 if not c.isprintable():
223 c = "."
224 s += c
225 print("%016x: %016x" % ((k*8) & 0xffffffffffffffff, v), s)
226 return res
227
228 def log_fancy(self, *, kind=LogKind.Default, name="Memory",
229 log2_line_size=4, log2_column_chunk_size=3, log=log):
230 line_size = 1 << log2_line_size
231 subline_mask = line_size - 1
232 column_chunk_size = 1 << log2_column_chunk_size
233
234 def make_line():
235 return bytearray(line_size)
236 mem_lines = defaultdict(make_line)
237 subword_range = range(1 << self.word_log2)
238 for k in self.word_idxs():
239 addr = k << self.word_log2
240 for _ in subword_range:
241 v = self.ld(addr, width=1, reason=_ReadReason.Dump)
242 mem_lines[addr >> log2_line_size][addr & subline_mask] = v
243 addr += 1
244
245 lines = []
246 last_line_index = None
247 for line_index in sorted(mem_lines.keys()):
248 line_addr = line_index << log2_line_size
249 if last_line_index is not None \
250 and last_line_index + 1 != line_index:
251 lines.append("*")
252 last_line_index = line_index
253 line_bytes = mem_lines[line_index]
254 line_str = f"0x{line_addr:08X}:"
255 for col_chunk in range(0, line_size,
256 column_chunk_size):
257 line_str += " "
258 for i in range(column_chunk_size):
259 line_str += f" {line_bytes[col_chunk + i]:02X}"
260 line_str += " |"
261 for i in range(line_size):
262 if 0x20 <= line_bytes[i] <= 0x7E:
263 line_str += chr(line_bytes[i])
264 else:
265 line_str += "."
266 line_str += "|"
267 lines.append(line_str)
268 lines = "\n".join(lines)
269 log(f"\n{name}:\n{lines}\n", kind=kind)
270
271
272 class Mem(MemCommon):
273 def __init__(self, row_bytes=8, initial_mem=None, misaligned_ok=False):
274 self.mem = {}
275 super().__init__(row_bytes, initial_mem, misaligned_ok)
276
277 def _read_word(self, word_idx, reason):
278 return self.mem.get(word_idx, reason.read_default)
279
280 def _write_word(self, word_idx, value):
281 self.mem[word_idx] = value
282
283 def word_idxs(self):
284 return self.mem.keys()
285
286
287 class _MMapPageFlags(enum.IntFlag):
288 """ flags on each mmap-ped page
289
290 Note: these are *not* PowerISA MMU pages, but instead internal to Mem so
291 it can detect invalid accesses and assert rather than segfaulting.
292 """
293 R = 1
294 W = 2
295 X = 4
296 "readable when instr_fetch=True"
297
298 RWX = R | W | X
299
300
301 _MMAP_PAGE_SIZE = 1 << 16 # size of chunk that we track
302 _PAGE_COUNT = (1 << 48) // _MMAP_PAGE_SIZE # 48-bit address space
303 _NEG_PG_IDX_START = _PAGE_COUNT // 2 # start of negative half of address space
304
305 # code assumes BLOCK_SIZE is a power of two
306 # BLOCK_SIZE = 1 << 32
307 BLOCK_SIZE = 1 << 28 # reduced so it works on armv7a
308
309 assert BLOCK_SIZE % _MMAP_PAGE_SIZE == 0
310 DEFAULT_BLOCK_ADDRS = (
311 0, # low end of user space
312 2 ** 47 - BLOCK_SIZE, # high end of user space
313 )
314
315
316 class MemMMap(MemCommon):
317 def __init__(self, row_bytes=8, initial_mem=None, misaligned_ok=False,
318 block_addrs=DEFAULT_BLOCK_ADDRS, emulating_mmap=False):
319 # we can't allocate the entire 2 ** 47 byte address space, so split
320 # it into smaller blocks
321 self.mem_blocks = {
322 addr: mmap.mmap(-1, BLOCK_SIZE) for addr in block_addrs}
323 assert all(addr % BLOCK_SIZE == 0 for addr in self.mem_blocks), \
324 "misaligned block address not supported"
325 self.page_flags = {}
326 self.modified_pages = set()
327 if not emulating_mmap:
328 # mark blocks as readable/writable
329 for addr, block in self.mem_blocks.items():
330 start_page_idx = addr // _MMAP_PAGE_SIZE
331 end_page_idx = start_page_idx + len(block) // _MMAP_PAGE_SIZE
332 for page_idx in range(start_page_idx, end_page_idx):
333 self.page_flags[page_idx] = _MMapPageFlags.RWX
334
335 super().__init__(row_bytes, initial_mem, misaligned_ok)
336
337 def mmap_page_idx_to_addr(self, page_idx):
338 assert 0 <= page_idx < _PAGE_COUNT
339 if page_idx >= _NEG_PG_IDX_START:
340 page_idx -= _PAGE_COUNT
341 return (page_idx * _MMAP_PAGE_SIZE) % 2 ** 64
342
343 def addr_to_mmap_page_idx(self, addr):
344 page_idx, offset = divmod(addr, _MMAP_PAGE_SIZE)
345 page_idx %= _PAGE_COUNT
346 expected = self.mmap_page_idx_to_addr(page_idx) + offset
347 if addr != expected:
348 exc = MemException("not sign extended",
349 f"address not sign extended: 0x{addr:X} "
350 f"expected 0x{expected:X}")
351 exc.dar = addr
352 raise exc
353 return page_idx
354
355 def __reduce_ex__(self, protocol):
356 raise PicklingError("MemMMap can't be deep-copied or pickled")
357
358 def __access_addr_range_err(self, start_addr, size, needed_flag):
359 assert needed_flag != _MMapPageFlags.W, \
360 f"can't write to address 0x{start_addr:X} size 0x{size:X}"
361 return None, 0
362
363 def __access_addr_range(self, start_addr, size, needed_flag):
364 assert size > 0, "invalid size"
365 page_idx = self.addr_to_mmap_page_idx(start_addr)
366 last_addr = start_addr + size - 1
367 last_page_idx = self.addr_to_mmap_page_idx(last_addr)
368 block_addr = start_addr % BLOCK_SIZE
369 block_k = start_addr - block_addr
370 last_block_addr = last_addr % BLOCK_SIZE
371 last_block_k = last_addr - last_block_addr
372 if block_k != last_block_k:
373 return self.__access_addr_range_err(start_addr, size, needed_flag)
374 for i in range(page_idx, last_page_idx + 1):
375 flags = self.page_flags.get(i, 0)
376 if flags & needed_flag == 0:
377 return self.__access_addr_range_err(
378 start_addr, size, needed_flag)
379 if needed_flag is _MMapPageFlags.W:
380 self.modified_pages.add(page_idx)
381 return self.mem_blocks[block_k], block_addr
382
383 def get_ctypes(self, start_addr, size, is_write):
384 """ returns a ctypes ubyte array referring to the memory at
385 `start_addr` with size `size`
386 """
387 flag = _MMapPageFlags.W if is_write else _MMapPageFlags.R
388 block, block_addr = self.__access_addr_range(start_addr, size, flag)
389 assert block is not None, \
390 f"can't read from address 0x{start_addr:X} size 0x{size:X}"
391 return (ctypes.c_ubyte * size).from_buffer(block, block_addr)
392
393 def _read_word(self, word_idx, reason):
394 block, block_addr = self.__access_addr_range(
395 word_idx * self.bytes_per_word, self.bytes_per_word,
396 reason.needed_mmap_page_flag)
397 if block is None:
398 return reason.read_default
399 bytes_ = block[block_addr:block_addr + self.bytes_per_word]
400 return int.from_bytes(bytes_, 'little')
401
402 def _write_word(self, word_idx, value):
403 block, block_addr = self.__access_addr_range(
404 word_idx * self.bytes_per_word, self.bytes_per_word,
405 _MMapPageFlags.W)
406 bytes_ = value.to_bytes(self.bytes_per_word, 'little')
407 block[block_addr:block_addr + self.bytes_per_word] = bytes_
408
409 def word_idxs(self):
410 zeros = bytes(self.bytes_per_word)
411 for page_idx in self.modified_pages:
412 start = self.mmap_page_idx_to_addr(page_idx)
413 block, block_addr = self.__access_addr_range(
414 start, _MMAP_PAGE_SIZE, _MMapPageFlags.R)
415 end = start + _MMAP_PAGE_SIZE
416 for word_idx in range(start // self.bytes_per_word,
417 end // self.bytes_per_word):
418 next_block_addr = block_addr + self.bytes_per_word
419 bytes_ = block[block_addr:next_block_addr]
420 block_addr = next_block_addr
421 if bytes_ != zeros:
422 yield word_idx