add WIP svp64 utf-8 validation algorithm
authorJacob Lifshay <programmerjake@gmail.com>
Tue, 23 Aug 2022 09:19:19 +0000 (02:19 -0700)
committerJacob Lifshay <programmerjake@gmail.com>
Tue, 23 Aug 2022 09:19:19 +0000 (02:19 -0700)
src/openpower/test/algorithms/__init__.py [new file with mode: 0644]
src/openpower/test/algorithms/svp64_utf_8_validation.py [new file with mode: 0644]

diff --git a/src/openpower/test/algorithms/__init__.py b/src/openpower/test/algorithms/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/openpower/test/algorithms/svp64_utf_8_validation.py b/src/openpower/test/algorithms/svp64_utf_8_validation.py
new file mode 100644 (file)
index 0000000..dbb4f1d
--- /dev/null
@@ -0,0 +1,191 @@
+# SPDX-License-Identifier: LGPL-3-or-later
+# Copyright 2022 Jacob Lifshay
+
+from openpower.decoder.selectable_int import SelectableInt
+from openpower.simulator.program import Program
+from openpower.test.common import TestAccumulatorBase
+from openpower.test.state import ExpectedState
+from openpower.sv.trans.svp64 import SVP64Asm
+
+
+def svp64_utf8_validation_asm():
+    raise NotImplementedError("not finished")
+    return [
+        "setvl 0, 0, 32, 0, 1, 1",  # set VL to 32
+        "sv.addi *64, 0, 0",  # clear prev iter's bytes
+        "loop:",
+        "setvl. 0, 0, 32, 0, 1, 1",  # set VL to 32
+        "sv.ori *32, *64, 0",  # copy prev iter's bytes
+
+        # clear cur iter's bytes, so bytes beyond end end up being zeros
+        "sv.addi *64, 0, 0",
+        "setvl 5, 4, 32, 0, 1, 1",  # set VL to min(32, r4)
+        "sv.lbz/els *64, 0(3)",  # load bytes
+        "setvl 0, 0, 32, 0, 1, 1",  # set VL to 32
+        # now we can operate on 32 byte chunks, branch to `fail` if they don't
+        # pass validation.
+        # TODO: finish
+        # branch at end, so we check last bytes from prev iter first
+        "bne loop",
+        "li 3, 1",
+        "blr",
+        "fail:",
+        "li 3, 0",
+        "blr",
+    ]
+
+
+class SVP64UTF8ValidationTestCase(TestAccumulatorBase):
+    def run_case(self, data):
+        # type: (bytes) -> None
+        expected = 1
+        try:
+            data.decode("utf-8")
+        except UnicodeDecodeError:
+            expected = 0
+        isa = SVP64Asm(svp64_utf8_validation_asm())
+        lst = list(isa)
+        initial_regs = [0x15cee3293aa9bfbe] * 128  # fill with junk
+        initial_regs[3] = 0x10000  # pointer to bytes to check
+        initial_regs[4] = len(data)  # length of bytes to check
+
+        initial_mem = {}
+        for i, v in enumerate(data):
+            initial_mem[i + initial_regs[3]] = v, 1
+        stop_at_pc = 0x10000000
+        initial_sprs = {8: SelectableInt(stop_at_pc, 64)}
+        e = ExpectedState(pc=stop_at_pc)
+        e.intregs[3] = expected
+        self.add_case(Program(lst, 0), initial_regs, initial_mem=initial_mem,
+                      initial_sprs=initial_sprs, stop_at_pc=stop_at_pc,
+                      expected=e)
+
+    def run_cases(self, data):
+        # type: (bytes | str) -> None
+        if isinstance(data, str):
+            data = data.encode("utf-8")
+        data = b' ' * 8 + data + b' ' * 8
+        for i in range(len(data)):
+            part = data[i:]
+            for j in range(len(part)):
+                self.run_case(part[:j])
+
+    def case_empty(self):
+        self.run_case(b"")
+
+    def case_nul(self):
+        self.run_cases("\u0000")  # min 1-byte
+
+    def case_a(self):
+        self.run_cases("a")
+
+    def case_7f(self):
+        self.run_cases("\u007F")  # max 1-byte
+
+    def case_c0_80(self):
+        self.run_cases(b"\xC0\x80")  # min 2-byte overlong encoding
+
+    def case_c1_bf(self):
+        self.run_cases(b"\xC1\xBF")  # max 2-byte overlong encoding
+
+    def case_u0080(self):
+        self.run_cases("\u0080")  # min 2-byte
+
+    def case_u07ff(self):
+        self.run_cases("\u07FF")  # max 2-byte
+
+    def case_e0_80_80(self):
+        self.run_cases(b"\xE0\x80\x80")  # min 3-byte overlong encoding
+
+    def case_e0_9f_bf(self):
+        self.run_cases(b"\xE0\x9F\xBF")  # max 3-byte overlong encoding
+
+    def case_u0800(self):
+        self.run_cases("\u0800")  # min 3-byte
+
+    def case_u0fff(self):
+        self.run_cases("\u0FFF")
+
+    def case_u1000(self):
+        self.run_cases("\u1000")
+
+    def case_ucfff(self):
+        self.run_cases("\uCFFF")
+
+    def case_ud000(self):
+        self.run_cases("\uD000")
+
+    def case_ud7ff(self):
+        self.run_cases("\uD7FF")
+
+    def case_ud800(self):
+        self.run_cases("\uD800")  # surrogate
+
+    def case_udbff(self):
+        self.run_cases("\uDBFF")  # surrogate
+
+    def case_udc00(self):
+        self.run_cases("\uDC00")  # surrogate
+
+    def case_udfff(self):
+        self.run_cases("\uDFFF")  # surrogate
+
+    def case_ue000(self):
+        self.run_cases("\uE000")
+
+    def case_uffff(self):
+        self.run_cases("\uFFFF")  # max 3-byte
+
+    def case_f0_80_80_80(self):
+        self.run_cases(b"\xF0\x80\x80\x80")  # min 4-byte overlong encoding
+
+    def case_f0_bf_bf_bf(self):
+        self.run_cases(b"\xF0\x8F\xBF\xBF")  # max 4-byte overlong encoding
+
+    def case_u00010000(self):
+        self.run_cases("\U00010000")  # min 4-byte
+
+    def case_u0003ffff(self):
+        self.run_cases("\U0003FFFF")
+
+    def case_u00040000(self):
+        self.run_cases("\U00040000")
+
+    def case_u000fffff(self):
+        self.run_cases("\U000FFFFF")
+
+    def case_u00100000(self):
+        self.run_cases("\U00100000")
+
+    def case_u0010ffff(self):
+        self.run_cases("\U0010FFFF")  # max 4-byte
+
+    def case_f4_90_80_80(self):
+        self.run_cases(b"\xF4\x90\x80\x80")  # first too-big encoding
+
+    def case_f7_bf_bf_bf(self):
+        self.run_cases(b"\xF7\xBF\xBF\xBF")  # max too-big 4-byte encoding
+
+    def case_f8_x4_80(self):
+        self.run_cases(b"\xF8" + b"\x80" * 4)  # min too-big 5-byte encoding
+
+    def case_fb_x4_bf(self):
+        self.run_cases(b"\xFB" + b"\xBF" * 4)  # max too-big 5-byte encoding
+
+    def case_fc_x5_80(self):
+        self.run_cases(b"\xFC" + b"\x80" * 5)  # min too-big 6-byte encoding
+
+    def case_fd_x5_bf(self):
+        self.run_cases(b"\xFD" + b"\xBF" * 5)  # max too-big 6-byte encoding
+
+    def case_fe_x6_80(self):
+        self.run_cases(b"\xFE" + b"\x80" * 6)  # min too-big 7-byte encoding
+
+    def case_fe_x6_bf(self):
+        self.run_cases(b"\xFE" + b"\xBF" * 6)  # max too-big 7-byte encoding
+
+    def case_ff_x7_80(self):
+        self.run_cases(b"\xFF" + b"\x80" * 7)  # min too-big 8-byte encoding
+
+    def case_ff_x7_bf(self):
+        self.run_cases(b"\xFF" + b"\xBF" * 7)  # max too-big 8-byte encoding