From f640d6b5c0ca5ae72d70cdaa95cda4f7e68e7e60 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Tue, 23 Aug 2022 02:19:19 -0700 Subject: [PATCH] add WIP svp64 utf-8 validation algorithm --- src/openpower/test/algorithms/__init__.py | 0 .../test/algorithms/svp64_utf_8_validation.py | 191 ++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 src/openpower/test/algorithms/__init__.py create mode 100644 src/openpower/test/algorithms/svp64_utf_8_validation.py diff --git a/src/openpower/test/algorithms/__init__.py b/src/openpower/test/algorithms/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/openpower/test/algorithms/svp64_utf_8_validation.py b/src/openpower/test/algorithms/svp64_utf_8_validation.py new file mode 100644 index 00000000..dbb4f1df --- /dev/null +++ b/src/openpower/test/algorithms/svp64_utf_8_validation.py @@ -0,0 +1,191 @@ +# SPDX-License-Identifier: LGPL-3-or-later +# Copyright 2022 Jacob Lifshay + +from openpower.decoder.selectable_int import SelectableInt +from openpower.simulator.program import Program +from openpower.test.common import TestAccumulatorBase +from openpower.test.state import ExpectedState +from openpower.sv.trans.svp64 import SVP64Asm + + +def svp64_utf8_validation_asm(): + raise NotImplementedError("not finished") + return [ + "setvl 0, 0, 32, 0, 1, 1", # set VL to 32 + "sv.addi *64, 0, 0", # clear prev iter's bytes + "loop:", + "setvl. 0, 0, 32, 0, 1, 1", # set VL to 32 + "sv.ori *32, *64, 0", # copy prev iter's bytes + + # clear cur iter's bytes, so bytes beyond end end up being zeros + "sv.addi *64, 0, 0", + "setvl 5, 4, 32, 0, 1, 1", # set VL to min(32, r4) + "sv.lbz/els *64, 0(3)", # load bytes + "setvl 0, 0, 32, 0, 1, 1", # set VL to 32 + # now we can operate on 32 byte chunks, branch to `fail` if they don't + # pass validation. + # TODO: finish + # branch at end, so we check last bytes from prev iter first + "bne loop", + "li 3, 1", + "blr", + "fail:", + "li 3, 0", + "blr", + ] + + +class SVP64UTF8ValidationTestCase(TestAccumulatorBase): + def run_case(self, data): + # type: (bytes) -> None + expected = 1 + try: + data.decode("utf-8") + except UnicodeDecodeError: + expected = 0 + isa = SVP64Asm(svp64_utf8_validation_asm()) + lst = list(isa) + initial_regs = [0x15cee3293aa9bfbe] * 128 # fill with junk + initial_regs[3] = 0x10000 # pointer to bytes to check + initial_regs[4] = len(data) # length of bytes to check + + initial_mem = {} + for i, v in enumerate(data): + initial_mem[i + initial_regs[3]] = v, 1 + stop_at_pc = 0x10000000 + initial_sprs = {8: SelectableInt(stop_at_pc, 64)} + e = ExpectedState(pc=stop_at_pc) + e.intregs[3] = expected + self.add_case(Program(lst, 0), initial_regs, initial_mem=initial_mem, + initial_sprs=initial_sprs, stop_at_pc=stop_at_pc, + expected=e) + + def run_cases(self, data): + # type: (bytes | str) -> None + if isinstance(data, str): + data = data.encode("utf-8") + data = b' ' * 8 + data + b' ' * 8 + for i in range(len(data)): + part = data[i:] + for j in range(len(part)): + self.run_case(part[:j]) + + def case_empty(self): + self.run_case(b"") + + def case_nul(self): + self.run_cases("\u0000") # min 1-byte + + def case_a(self): + self.run_cases("a") + + def case_7f(self): + self.run_cases("\u007F") # max 1-byte + + def case_c0_80(self): + self.run_cases(b"\xC0\x80") # min 2-byte overlong encoding + + def case_c1_bf(self): + self.run_cases(b"\xC1\xBF") # max 2-byte overlong encoding + + def case_u0080(self): + self.run_cases("\u0080") # min 2-byte + + def case_u07ff(self): + self.run_cases("\u07FF") # max 2-byte + + def case_e0_80_80(self): + self.run_cases(b"\xE0\x80\x80") # min 3-byte overlong encoding + + def case_e0_9f_bf(self): + self.run_cases(b"\xE0\x9F\xBF") # max 3-byte overlong encoding + + def case_u0800(self): + self.run_cases("\u0800") # min 3-byte + + def case_u0fff(self): + self.run_cases("\u0FFF") + + def case_u1000(self): + self.run_cases("\u1000") + + def case_ucfff(self): + self.run_cases("\uCFFF") + + def case_ud000(self): + self.run_cases("\uD000") + + def case_ud7ff(self): + self.run_cases("\uD7FF") + + def case_ud800(self): + self.run_cases("\uD800") # surrogate + + def case_udbff(self): + self.run_cases("\uDBFF") # surrogate + + def case_udc00(self): + self.run_cases("\uDC00") # surrogate + + def case_udfff(self): + self.run_cases("\uDFFF") # surrogate + + def case_ue000(self): + self.run_cases("\uE000") + + def case_uffff(self): + self.run_cases("\uFFFF") # max 3-byte + + def case_f0_80_80_80(self): + self.run_cases(b"\xF0\x80\x80\x80") # min 4-byte overlong encoding + + def case_f0_bf_bf_bf(self): + self.run_cases(b"\xF0\x8F\xBF\xBF") # max 4-byte overlong encoding + + def case_u00010000(self): + self.run_cases("\U00010000") # min 4-byte + + def case_u0003ffff(self): + self.run_cases("\U0003FFFF") + + def case_u00040000(self): + self.run_cases("\U00040000") + + def case_u000fffff(self): + self.run_cases("\U000FFFFF") + + def case_u00100000(self): + self.run_cases("\U00100000") + + def case_u0010ffff(self): + self.run_cases("\U0010FFFF") # max 4-byte + + def case_f4_90_80_80(self): + self.run_cases(b"\xF4\x90\x80\x80") # first too-big encoding + + def case_f7_bf_bf_bf(self): + self.run_cases(b"\xF7\xBF\xBF\xBF") # max too-big 4-byte encoding + + def case_f8_x4_80(self): + self.run_cases(b"\xF8" + b"\x80" * 4) # min too-big 5-byte encoding + + def case_fb_x4_bf(self): + self.run_cases(b"\xFB" + b"\xBF" * 4) # max too-big 5-byte encoding + + def case_fc_x5_80(self): + self.run_cases(b"\xFC" + b"\x80" * 5) # min too-big 6-byte encoding + + def case_fd_x5_bf(self): + self.run_cases(b"\xFD" + b"\xBF" * 5) # max too-big 6-byte encoding + + def case_fe_x6_80(self): + self.run_cases(b"\xFE" + b"\x80" * 6) # min too-big 7-byte encoding + + def case_fe_x6_bf(self): + self.run_cases(b"\xFE" + b"\xBF" * 6) # max too-big 7-byte encoding + + def case_ff_x7_80(self): + self.run_cases(b"\xFF" + b"\x80" * 7) # min too-big 8-byte encoding + + def case_ff_x7_bf(self): + self.run_cases(b"\xFF" + b"\xBF" * 7) # max too-big 8-byte encoding -- 2.30.2