From 00a153e29456dbfcaa339a9d1b4481873c3d40d4 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Sun, 26 Nov 2023 19:13:25 -0800 Subject: [PATCH] implement MemMMap.mmap_syscall --- src/openpower/decoder/isa/mem.py | 486 +++++++++++++++++++++++++++++-- 1 file changed, 454 insertions(+), 32 deletions(-) diff --git a/src/openpower/decoder/isa/mem.py b/src/openpower/decoder/isa/mem.py index 7b8c2dda..9ca49900 100644 --- a/src/openpower/decoder/isa/mem.py +++ b/src/openpower/decoder/isa/mem.py @@ -21,6 +21,10 @@ from cached_property import cached_property import mmap from pickle import PicklingError import ctypes +from nmutil import plain_data +from pathlib import Path +from openpower.syscalls import ppc_flags +import os def swap_order(x, nbytes): @@ -71,8 +75,8 @@ class _ReadReason(enum.Enum): @cached_property def needed_mmap_page_flag(self): if self is self.Execute: - return _MMapPageFlags.X - return _MMapPageFlags.R + return MMapPageFlags.X + return MMapPageFlags.R class MemCommon: @@ -284,7 +288,7 @@ class Mem(MemCommon): return self.mem.keys() -class _MMapPageFlags(enum.IntFlag): +class MMapPageFlags(enum.IntFlag): """ flags on each mmap-ped page Note: these are *not* PowerISA MMU pages, but instead internal to Mem so @@ -295,59 +299,476 @@ class _MMapPageFlags(enum.IntFlag): X = 4 "readable when instr_fetch=True" + S = 8 + "shared -- aka. not copy-on-write" + + GROWS_DOWN = 16 + """this memory block will grow when the address one page before the + beginning is accessed""" + RWX = R | W | X + NONE = 0 + +_ALLOWED_MMAP_NORMAL_FLAGS = MMapPageFlags.RWX | MMapPageFlags.S +_ALLOWED_MMAP_STACK_FLAGS = MMapPageFlags.RWX | MMapPageFlags.GROWS_DOWN -_MMAP_PAGE_SIZE = 1 << 16 # size of chunk that we track -_PAGE_COUNT = (1 << 48) // _MMAP_PAGE_SIZE # 48-bit address space + +MMAP_PAGE_SIZE = 1 << 16 # size of chunk that we track +_PAGE_COUNT = (1 << 48) // MMAP_PAGE_SIZE # 48-bit address space _NEG_PG_IDX_START = _PAGE_COUNT // 2 # start of negative half of address space # code assumes BLOCK_SIZE is a power of two # BLOCK_SIZE = 1 << 32 BLOCK_SIZE = 1 << 28 # reduced so it works on armv7a -assert BLOCK_SIZE % _MMAP_PAGE_SIZE == 0 +assert BLOCK_SIZE % MMAP_PAGE_SIZE == 0 DEFAULT_BLOCK_ADDRS = ( 0, # low end of user space 2 ** 47 - BLOCK_SIZE, # high end of user space ) +@plain_data.plain_data(frozen=True, unsafe_hash=True) +class MMapEmuBlock: + __slots__ = ("addrs", "flags", "file", "file_off") + + def __init__(self, addrs, flags=MMapPageFlags.NONE, file=None, file_off=0): + # type: (range, MMapPageFlags, Path | str | None, int) -> None + if addrs.step != 1: + raise ValueError("bad address range, step must be 1") + if len(addrs) <= 0: + raise ValueError("bad address range, must be non-empty") + if addrs.start < 0: + raise ValueError("bad address range, must be non-negative") + if addrs.stop > 2 ** 64: + raise ValueError("bad address range -- goes beyond 2 ** 64") + if addrs.start % MMAP_PAGE_SIZE: + raise ValueError("bad address range -- start isn't page-aligned") + if addrs.stop % MMAP_PAGE_SIZE: + raise ValueError("bad address range -- stop isn't page-aligned") + if addrs[0] // BLOCK_SIZE != addrs[-1] // BLOCK_SIZE: + raise ValueError( + "bad address range -- crosses underlying block boundaries") + if file is not None: + if file_off < 0: + raise ValueError("bad file_off, must be non-negative") + if file_off % MMAP_PAGE_SIZE: + raise ValueError("bad file_off, must be page-aligned") + if flags & ~_ALLOWED_MMAP_NORMAL_FLAGS: + raise ValueError("invalid flags for mmap with file") + file = Path(file) + else: + if flags & ~_ALLOWED_MMAP_NORMAL_FLAGS: + if flags & ~_ALLOWED_MMAP_STACK_FLAGS: + raise ValueError("invalid flags for anonymous mmap") + file_off = 0 # no file -- clear offset + self.addrs = addrs + self.flags = flags + self.file = file + self.file_off = file_off + self.page_indexes # check that addresses can be mapped to pages + + def intersects(self, other): + # type: (MMapEmuBlock) -> bool + return (other.addrs.start < self.addrs.stop + and self.addrs.start < other.addrs.stop) + + @property + def is_private_mem(self): + return self.file is None and not self.flags & MMapPageFlags.S + + @property + def underlying_block_key(self): + offset = self.addrs.start % BLOCK_SIZE + return self.addrs.start - offset + + @property + def underlying_block_offsets(self): + start = self.addrs.start % BLOCK_SIZE + return range(start, start + len(self.addrs)) + + @property + def page_indexes(self): + first_page = MemMMap.addr_to_mmap_page_idx(self.addrs[0]) + # can't just use stop, since that may be out-of-range + last_page = MemMMap.addr_to_mmap_page_idx(self.addrs[-1]) + if first_page < _NEG_PG_IDX_START and last_page >= _NEG_PG_IDX_START: + raise ValueError( + "bad address range, crosses transition from positive " + "canonical addresses to negative canonical addresses") + return range(first_page, last_page + 1) + + def difference(self, remove): + # type: (MMapEmuBlock) -> list[MMapEmuBlock] + """returns the blocks left after removing `remove` from `self`""" + if not self.intersects(remove): + return [self] + retval = [] + addrs = range(self.addrs.start, remove.addrs.start) + if len(addrs): + retval.append(plain_data.replace(self, addrs=addrs)) + addrs = range(remove.addrs.stop, self.addrs.stop) + if len(addrs): + file_off = self.file_off + addrs.start - self.addrs.start + retval.append(plain_data.replace( + self, addrs=addrs, file_off=file_off)) + return retval + + +# stuff marked "not available" is not in the powerpc64le headers on my system +LEGACY_MAP_MASK = ( + ppc_flags.MAP_SHARED + | ppc_flags.MAP_PRIVATE + | ppc_flags.MAP_FIXED + | ppc_flags.MAP_ANONYMOUS + | ppc_flags.MAP_DENYWRITE + | ppc_flags.MAP_EXECUTABLE + # | ppc_flags.MAP_UNINITIALIZED # not available -- ignored for now + | ppc_flags.MAP_GROWSDOWN + | ppc_flags.MAP_LOCKED + | ppc_flags.MAP_NORESERVE + | ppc_flags.MAP_POPULATE + | ppc_flags.MAP_NONBLOCK + | ppc_flags.MAP_STACK + | ppc_flags.MAP_HUGETLB + # | ppc_flags.MAP_32BIT # not available -- ignored for now + # | ppc_flags.MAP_ABOVE4G # not available -- ignored for now + # | ppc_flags.MAP_HUGE_2MB # not available -- ignored for now + # | ppc_flags.MAP_HUGE_1GB # not available -- ignored for now +) + +_MAP_GROWS = ppc_flags.MAP_GROWSDOWN +# _MAP_GROWS |= ppc_flags.MAP_GROWSUP # not available -- ignored for now + +def len_(r): + """ len(), but with fix for len(range(2**64)) raising OverflowError """ + try: + return len(r) + except OverflowError: + assert isinstance(r, range) + return 1 + (r.stop - r.start - 1) // r.step + + class MemMMap(MemCommon): def __init__(self, row_bytes=8, initial_mem=None, misaligned_ok=False, - block_addrs=DEFAULT_BLOCK_ADDRS, emulating_mmap=False): + block_addrs=DEFAULT_BLOCK_ADDRS, emulating_mmap=False, + mmap_emu_data_block=None): # we can't allocate the entire 2 ** 47 byte address space, so split # it into smaller blocks self.mem_blocks = { - addr: mmap.mmap(-1, BLOCK_SIZE) for addr in block_addrs} + addr: mmap.mmap(-1, BLOCK_SIZE) for addr in sorted(block_addrs)} assert all(addr % BLOCK_SIZE == 0 for addr in self.mem_blocks), \ "misaligned block address not supported" - self.page_flags = {} + self.__page_flags = {} self.modified_pages = set() - if not emulating_mmap: + self.mmap_emu_data_block = mmap_emu_data_block + self.__mmap_emu_alloc_blocks = set() # type: set[MMapEmuBlock] | None + + # build the list of unbacked blocks -- those address ranges that have + # no backing memory so mmap can't allocate there. These are maintained + # separately from __mmap_emu_alloc_blocks so munmap/mremap can't + # remove/modify them + addr_ranges = [ + range(a, a + len(b)) for a, b in self.mem_blocks.items()] + self.__mmap_emu_unbacked_blocks = tuple(self.__gaps_in(addr_ranges)) + + if emulating_mmap: + if mmap_emu_data_block is not None: + if not isinstance(mmap_emu_data_block, MMapEmuBlock): + raise TypeError( + "mmap_emu_data_block must be a MMapEmuBlock") + if mmap_emu_data_block.file is not None: + raise ValueError( + "mmap_emu_data_block must be an anonymous mapping") + if not self.__mmap_emu_map_fixed(block=mmap_emu_data_block, + replace=False, dry_run=False): + raise ValueError("invalid mmap_emu_data_block") + else: + self.__mmap_emu_alloc_blocks = None + if mmap_emu_data_block is not None: + raise ValueError("can't set mmap_emu_data_block " + "without emulating_mmap=True") # mark blocks as readable/writable for addr, block in self.mem_blocks.items(): - start_page_idx = addr // _MMAP_PAGE_SIZE - end_page_idx = start_page_idx + len(block) // _MMAP_PAGE_SIZE - for page_idx in range(start_page_idx, end_page_idx): - self.page_flags[page_idx] = _MMapPageFlags.RWX + start_page = self.addr_to_mmap_page_idx(addr) + end_page = start_page + len(block) // MMAP_PAGE_SIZE + for page_idx in range(start_page, end_page): + self.__page_flags[page_idx] = MMapPageFlags.RWX super().__init__(row_bytes, initial_mem, misaligned_ok) - def mmap_page_idx_to_addr(self, page_idx): + @staticmethod + def __gaps_in(sorted_ranges, start=0, stop=2 ** 64): + # type: (list[range] | tuple[range], int, int) -> list[range] + start = 0 + gaps = [] + for r in sorted_ranges: + gap = range(start, r.start) + if len(gap): + gaps.append(gap) + start = r.stop + gap = range(start, stop) + if len_(gap): + gaps.append(gap) + return gaps + + @property + def emulating_mmap(self): + return self.__mmap_emu_alloc_blocks is not None + + def __mmap_emu_map_fixed(self, block, replace, dry_run): + # type: (MMapEmuBlock, bool, bool) -> bool + """insert the block at the fixed address passed in, replacing the + parts of any other blocks that overlap if `replace` is `True`. + + If `dry_run`, then don't make any changes, just check if it would + succeed. + + This function requires the caller to check `block`'s permissions and to + perform the underlying `mmap` first. + """ + if block.underlying_block_key not in self.mem_blocks: + return False # unbacked block + # intersecting_blocks must be separate list so we don't iterate while + # we modify self.__mmap_emu_alloc_blocks + intersecting_blocks = [ + b for b in self.__mmap_emu_alloc_blocks if block.intersects(b)] + for b in intersecting_blocks: + if not replace: + return False + if self.mmap_emu_data_block == b: + # FIXME: what does linux do here? + raise NotImplementedError( + "mmap overlapping the data block isn't implemented") + if not dry_run: + self.__mmap_emu_alloc_blocks.remove(b) + for replacement in b.difference(block): + self.__mmap_emu_alloc_blocks.add(replacement) + if not dry_run: + self.__mmap_emu_alloc_blocks.add(block) + for page_idx in block.page_indexes: + self.__page_flags[page_idx] = block.flags + return True + + def __mmap_emu_resize_map_fixed(self, block, new_size): + # type: (MMapEmuBlock, int) -> MMapEmuBlock | None + assert block in self.__mmap_emu_alloc_blocks, \ + "can't resize unmapped block" + if new_size == len(block.addrs): + return block + addrs = range(block.addrs.start, block.addrs.start + new_size) + new_block = plain_data.replace(block, addrs=addrs) + self.__mmap_emu_alloc_blocks.remove(block) + try: + if not self.__mmap_emu_map_fixed( + new_block, replace=False, dry_run=True): + return None + finally: + self.__mmap_emu_alloc_blocks.add(block) + if not block.is_private_mem: + # FIXME: implement resizing underlying mapping + raise NotImplementedError + else: + # clear newly mapped bytes + clear_addrs = range(block.addrs.stop, new_block.addrs.stop) + if len(clear_addrs): + clear_block = MMapEmuBlock(clear_addrs) + mem_block = self.mem_blocks[clear_block.underlying_block_key] + assert mem_block is not None + clear_size = len(clear_addrs) + arr = (ctypes.c_ubyte * clear_size).from_buffer( + mem_block, clear_block.underlying_block_offsets.start) + ctypes.memset(arr, 0, clear_size) + if self.mmap_emu_data_block == block: + self.mmap_emu_data_block = new_block + self.__mmap_emu_alloc_blocks.remove(block) + self.__mmap_emu_alloc_blocks.add(new_block) + + if new_size < len(block.addrs): + # shrinking -- unmap pages at end + r = range(new_block.page_indexes.stop, block.page_indexes.stop) + for page_idx in r: + self.__page_flags.pop(page_idx) + self.modified_pages.remove(page_idx) + else: + # expanding -- map pages at end, they're cleared already + r = range(block.page_indexes.stop, new_block.page_indexes.stop) + for page_idx in r: + self.__page_flags[page_idx] = block.flags + self.modified_pages.remove(page_idx) # cleared page + return new_block + + def __mmap_emu_find_free_addr(self, block): + # type: (MMapEmuBlock) -> MMapEmuBlock | None + """find a spot where `block` will fit, returning the new block""" + blocks = [*self.__mmap_emu_alloc_blocks, + *self.__mmap_emu_unbacked_blocks] + blocks.sort(key=lambda b: b.addrs.start) + biggest_gap = range(0) + for gap in self.__gaps_in([b.addrs for b in blocks]): + if len(biggest_gap) < len(gap): + biggest_gap = gap + extra_size = len(biggest_gap) - len(block.addrs) + if extra_size < 0: + return None # no space anywhere + # try to allocate in the middle of the gap, so mmaps can grow later + offset = extra_size // 2 + + # align to page -- this depends on gap being aligned already. + # + # rounds down offset, so no need to check size again since it can't + # ever get closer to the end of the gap + offset -= offset % MMAP_PAGE_SIZE + start = biggest_gap.start + offset + addrs = range(start, start + len(block)) + return plain_data.replace(block, addrs=addrs) + + def __mmap_emu_try_grow_down(self, addr, needed_flag): + # type: (int, MMapPageFlags) -> bool + """ if addr is the page just before a GROW_DOWN block, try to grow it. + returns True if successful. """ + raise NotImplementedError # FIXME: implement + + def brk_syscall(self, addr): + assert self.emulating_mmap, "brk syscall requires emulating_mmap=True" + assert self.mmap_emu_data_block is not None, \ + "brk syscall requires a data block/segment" + + # round addr up to the nearest page + addr_div_page_size = -(-addr // MMAP_PAGE_SIZE) # ceil(addr / size) + addr = addr_div_page_size * MMAP_PAGE_SIZE + + raise NotImplementedError # FIXME: finish + + def mmap_syscall(self, addr, length, prot, flags, fd, offset, is_mmap2): + if is_mmap2: + offset *= 4096 # specifically *not* the page size + prot_read = bool(prot & ppc_flags.PROT_READ) + prot_write = bool(prot & ppc_flags.PROT_WRITE) + prot_exec = bool(prot & ppc_flags.PROT_EXEC) + prot_all = (ppc_flags.PROT_READ | ppc_flags.PROT_WRITE + | ppc_flags.PROT_EXEC) + # checks based off the checks in linux + if prot & ~prot_all: + return -ppc_flags.EINVAL + if offset % MMAP_PAGE_SIZE: + return -ppc_flags.EINVAL + if flags & ppc_flags.MAP_HUGETLB: + # not supported + return -ppc_flags.EINVAL + if length <= 0 or offset < 0: + return -ppc_flags.EINVAL + if flags & ppc_flags.MAP_FIXED_NOREPLACE: + flags |= ppc_flags.MAP_FIXED + if not (flags & ppc_flags.MAP_FIXED): + addr &= MMAP_PAGE_SIZE - 1 # page-align address, rounding down + # page-align length, rounding up + length = (length + MMAP_PAGE_SIZE - 1) & ~(MMAP_PAGE_SIZE - 1) + if length + offset >= 2 ** 64: + # overflowed + return -ppc_flags.ENOMEM + block_flags = MMapPageFlags.NONE + if prot_read: + block_flags |= MMapPageFlags.R + if prot_write: + block_flags |= MMapPageFlags.W + if prot_exec: + block_flags |= MMapPageFlags.X + if flags & ppc_flags.MAP_GROWSDOWN: + block_flags |= MMapPageFlags.GROWS_DOWN + file = None + if fd >= 0: + try: + file = os.readlink("/proc/self/fd/%i" % fd) + except IOError: + return -ppc_flags.EBADF + try: + block = MMapEmuBlock( + range(addr, addr + length), block_flags, file, offset) + except (ValueError, MemException): + return -ppc_flags.EINVAL + if not (flags & ppc_flags.MAP_FIXED): + block = self.__mmap_emu_find_free_addr(block) + if block is None: + return -ppc_flags.ENOMEM + if flags & ppc_flags.MAP_LOCKED: + return -ppc_flags.EPERM + map_ty = flags & ppc_flags.MAP_TYPE + if file is not None: + fallthrough = False + if map_ty == ppc_flags.MAP_SHARED: + flags &= LEGACY_MAP_MASK + fallthrough = True + if fallthrough or map_ty == ppc_flags.MAP_SHARED_VALIDATE: + if flags & ~LEGACY_MAP_MASK: + return -ppc_flags.EOPNOTSUPP + raise NotImplementedError("MAP_SHARED on file") + fallthrough = True + if fallthrough or map_ty == ppc_flags.MAP_PRIVATE: + if flags & _MAP_GROWS: + return -ppc_flags.EINVAL + else: + return -ppc_flags.EINVAL + elif map_ty == ppc_flags.MAP_SHARED: + if flags & _MAP_GROWS: + return -ppc_flags.EINVAL + raise NotImplementedError("MAP_SHARED on memory") + elif map_ty != ppc_flags.MAP_PRIVATE: + return -ppc_flags.EINVAL + replace = not (flags & ppc_flags.MAP_FIXED_NOREPLACE) + if not self.__mmap_emu_map_fixed(block, replace, dry_run=True): + # failed, was that because there's an existing memory block or + # that was an invalid address? + if self.__mmap_emu_map_fixed(block, replace=True, dry_run=True): + return -ppc_flags.EEXIST # existing memory block + else: + return -ppc_flags.EINVAL # invalid address + mblock = self.mem_blocks[block.underlying_block_key] + offsets = block.underlying_block_offsets + buf = (ctypes.c_ubyte * len(offsets)).from_buffer(mblock, offsets[0]) + buf_addr = ctypes.addressof(buf) + libc = ctypes.CDLL(None) + syscall = libc.syscall + restype = syscall.restype + argtypes = syscall.argtypes + syscall.restype = ctypes.c_long + syscall.argtypes = (ctypes.c_long,) * 6 + call_no = ctypes.c_long(ppc_flags.host_defines['SYS_mmap']) + host_prot = ppc_flags.host_defines['PROT_READ'] + if block.flags & prot_write: + host_prot = ppc_flags.host_defines['PROT_WRITE'] + host_flags = ppc_flags.host_defines['MAP_FIXED'] + host_flags |= ppc_flags.host_defines['MAP_PRIVATE'] + if file is None: + host_flags |= ppc_flags.host_defines['MAP_ANONYMOUS'] + res = int(syscall( + call_no, ctypes.c_long(buf_addr), ctypes.c_long(len(offsets)), + ctypes.c_long(host_prot), ctypes.c_long(host_flags), + ctypes.c_long(fd), ctypes.c_long(offset))) + syscall.restype = restype + syscall.argtypes = argtypes + if res == -1: + return -ctypes.get_errno() + self.__mmap_emu_map_fixed(block, replace=True, dry_run=False) + return block.addrs.start + + @staticmethod + def mmap_page_idx_to_addr(page_idx): assert 0 <= page_idx < _PAGE_COUNT if page_idx >= _NEG_PG_IDX_START: page_idx -= _PAGE_COUNT - return (page_idx * _MMAP_PAGE_SIZE) % 2 ** 64 + return (page_idx * MMAP_PAGE_SIZE) % 2 ** 64 - def addr_to_mmap_page_idx(self, addr): - page_idx, offset = divmod(addr, _MMAP_PAGE_SIZE) + @staticmethod + def addr_to_mmap_page_idx(addr): + page_idx, offset = divmod(addr, MMAP_PAGE_SIZE) page_idx %= _PAGE_COUNT - expected = self.mmap_page_idx_to_addr(page_idx) + offset + expected = MemMMap.mmap_page_idx_to_addr(page_idx) + offset if addr != expected: exc = MemException("not sign extended", - f"address not sign extended: 0x{addr:X} " - f"expected 0x{expected:X}") + ("address not sign extended: 0x%X " + "expected 0x%X") % (addr, expected)) exc.dar = addr raise exc return page_idx @@ -356,8 +777,8 @@ class MemMMap(MemCommon): raise PicklingError("MemMMap can't be deep-copied or pickled") def __access_addr_range_err(self, start_addr, size, needed_flag): - assert needed_flag != _MMapPageFlags.W, \ - f"can't write to address 0x{start_addr:X} size 0x{size:X}" + assert needed_flag != MMapPageFlags.W, \ + "can't write to address 0x%X size 0x%X" % (start_addr, size) return None, 0 def __access_addr_range(self, start_addr, size, needed_flag): @@ -372,11 +793,12 @@ class MemMMap(MemCommon): if block_k != last_block_k: return self.__access_addr_range_err(start_addr, size, needed_flag) for i in range(page_idx, last_page_idx + 1): - flags = self.page_flags.get(i, 0) + flags = self.__page_flags.get(i, 0) if flags & needed_flag == 0: - return self.__access_addr_range_err( - start_addr, size, needed_flag) - if needed_flag is _MMapPageFlags.W: + if not self.__mmap_emu_try_grow_down(start_addr, needed_flag): + return self.__access_addr_range_err( + start_addr, size, needed_flag) + if needed_flag is MMapPageFlags.W: self.modified_pages.add(page_idx) return self.mem_blocks[block_k], block_addr @@ -384,7 +806,7 @@ class MemMMap(MemCommon): """ returns a ctypes ubyte array referring to the memory at `start_addr` with size `size` """ - flag = _MMapPageFlags.W if is_write else _MMapPageFlags.R + flag = MMapPageFlags.W if is_write else MMapPageFlags.R block, block_addr = self.__access_addr_range(start_addr, size, flag) assert block is not None, \ f"can't read from address 0x{start_addr:X} size 0x{size:X}" @@ -402,7 +824,7 @@ class MemMMap(MemCommon): def _write_word(self, word_idx, value): block, block_addr = self.__access_addr_range( word_idx * self.bytes_per_word, self.bytes_per_word, - _MMapPageFlags.W) + MMapPageFlags.W) bytes_ = value.to_bytes(self.bytes_per_word, 'little') block[block_addr:block_addr + self.bytes_per_word] = bytes_ @@ -411,10 +833,10 @@ class MemMMap(MemCommon): for page_idx in self.modified_pages: start = self.mmap_page_idx_to_addr(page_idx) block, block_addr = self.__access_addr_range( - start, _MMAP_PAGE_SIZE, _MMapPageFlags.R) - end = start + _MMAP_PAGE_SIZE + start, MMAP_PAGE_SIZE, MMapPageFlags.R) + end = start + MMAP_PAGE_SIZE for word_idx in range(start // self.bytes_per_word, - end // self.bytes_per_word): + end // self.bytes_per_word): next_block_addr = block_addr + self.bytes_per_word bytes_ = block[block_addr:next_block_addr] block_addr = next_block_addr -- 2.30.2