From 1e445b5efce833d158950c5084d8ee1dce0be0f8 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Wed, 24 Aug 2022 04:36:25 -0700 Subject: [PATCH] working on svp64 utf-8 validation -- still broken --- .../isa/test_caller_svp64_utf_8_validation.py | 43 +--- .../test/algorithms/svp64_utf_8_validation.py | 236 ++++++++++++------ 2 files changed, 174 insertions(+), 105 deletions(-) diff --git a/src/openpower/decoder/isa/test_caller_svp64_utf_8_validation.py b/src/openpower/decoder/isa/test_caller_svp64_utf_8_validation.py index 37fbdf41..e14b12c5 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_utf_8_validation.py +++ b/src/openpower/decoder/isa/test_caller_svp64_utf_8_validation.py @@ -1,39 +1,22 @@ -""" Decoder tests - -related bugs: - - * -""" +# SPDX-License-Identifier: LGPL-3-or-later +# Copyright 2022 Jacob Lifshay import unittest -import sys - -# These tests utilize the run_hdl=False parameter to compare -# simulator with expected states -from soc.simple.test.test_runner import TestRunner from openpower.test.algorithms.svp64_utf_8_validation import \ SVP64UTF8ValidationTestCase +from openpower.test.runner import TestRunnerBase +# writing the test_caller invocation this way makes it work with pytest -if __name__ == "__main__": - - # allow list of testing to be selected by command-line - testing = sys.argv[1:] - sys.argv = sys.argv[:1] - - if not testing: - testing = ['utf-8_validation'] +class TestSVP64UTF8Validation(TestRunnerBase): + def __init__(self, test): + assert test == 'test' + super().__init__(SVP64UTF8ValidationTestCase().test_data) - unittest.main(exit=False) - suite = unittest.TestSuite() + def test(self): + # dummy function to make unittest try to test this class + pass - # dictionary of data for tests - tests = {'utf-8_validation': SVP64UTF8ValidationTestCase().test_data} - # walk through all tests, those requested get added - for tname, data in tests.items(): - if tname in testing: - suite.addTest(TestRunner(data, run_hdl=False)) - - runner = unittest.TextTestRunner() - runner.run(suite) +if __name__ == "__main__": + unittest.main() diff --git a/src/openpower/test/algorithms/svp64_utf_8_validation.py b/src/openpower/test/algorithms/svp64_utf_8_validation.py index efa5dcfe..fd72efef 100644 --- a/src/openpower/test/algorithms/svp64_utf_8_validation.py +++ b/src/openpower/test/algorithms/svp64_utf_8_validation.py @@ -2,11 +2,13 @@ # Copyright 2022 Jacob Lifshay import enum +import re from openpower.decoder.selectable_int import SelectableInt from openpower.simulator.program import Program -from openpower.test.common import TestAccumulatorBase +from openpower.test.common import TestAccumulatorBase, skip_case from openpower.test.state import ExpectedState from openpower.sv.trans.svp64 import SVP64Asm +from cached_property import cached_property SVP64_UTF8_VALIDATION_DATA_ADDR = 0x10000 @@ -131,107 +133,149 @@ SECOND_BYTE_HIGH_NIBBLE_LUT = [ def svp64_utf8_validation_asm(): - # raise NotImplementedError("not finished") + vec_sz = 8 # limited by number of CR fields implemented in the simulator + inp_addr = 3 + # cur bytes in r48-r63 -- u64x16 + cur_bytes = 48 + # prev bytes in r45-r47 -- u64x3 + prev_bytes_sz = 3 + prev_bytes = cur_bytes - prev_bytes_sz + # error flags in r56-r63 -- u64x8 + temp_vec1 = cur_bytes + vec_sz + # nibbles to look up in r64-r71 -- u64x8 + temp_vec2 = cur_bytes + vec_sz * 2 + temp_vec2_end = temp_vec2 + vec_sz return [ # input addr in r3, input length in r4 - # prev bytes in r45-r47 -- u64x3 - # cur bytes in r48-r63 -- u64x16 - # nibbles to look up in r80-r95 -- u64x16 - # error flags in r64-r79 -- u64x16 - "setvl 0, 0, 3, 0, 1, 1", # set VL to 3 - "sv.addi *45, 0, 0", # clear prev bytes - f"lis 6, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}", + f"setvl 0, 0, {prev_bytes_sz}, 0, 1, 1", # set VL to prev_bytes_sz + f"sv.addi *{prev_bytes}, 0, 0", # clear prev bytes + f"addis 6, 0, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}", f"ori 6, 6, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR & 0xFFFF}", - f"lis 7, {FIRST_BYTE_LOW_NIBBLE_LUT_ADDR >> 16}", + f"addis 7, 0, {FIRST_BYTE_LOW_NIBBLE_LUT_ADDR >> 16}", f"ori 7, 7, {FIRST_BYTE_LOW_NIBBLE_LUT_ADDR & 0xFFFF}", - f"lis 8, {SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}", + f"addis 8, 0, {SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}", f"ori 8, 8, {SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR & 0xFFFF}", - "loop:", - "setvl 0, 0, 3, 0, 1, 1", # set VL to 3 - "sv.ori *45, *61, 0", # copy prev bytes from end of cur bytes + f"loop:", + f"setvl 0, 0, {prev_bytes_sz}, 0, 1, 1", # set VL to prev_bytes_sz + # copy prev bytes from end of cur bytes + f"sv.ori *{prev_bytes}, *{cur_bytes + vec_sz - prev_bytes_sz}, 0", # clear cur bytes, so bytes beyond end end up being zeros - "setvl 0, 0, 16, 0, 1, 1", # set VL to 16 - "sv.addi *48, 0, 0", # clear cur bytes - "setvl. 5, 4, 16, 0, 1, 1", # set VL to min(16, r4) - "beq final_check", # if no bytes left to load, run final check - "sv.lbz/els *48, 0(3)", # load bytes - "setvl 0, 0, 16, 0, 1, 1", # set VL to 16 - # now we can operate on 16 byte chunks, branch to `fail` if they don't - # pass validation. + f"setvl 0, 0, {vec_sz}, 0, 1, 1", # set VL to vec_sz + f"sv.addi *{cur_bytes}, 0, 0", # clear cur bytes + f"setvl. 5, 4, {vec_sz}, 0, 1, 1", # set VL to min(vec_sz, r4) + # if no bytes left to load, run final check + f"bc 12, 2, final_check", # beq final_check + f"sv.lbz/els *{cur_bytes}, 0({inp_addr})", # load bytes + f"setvl 0, 0, {vec_sz}, 0, 1, 1", # set VL to vec_sz + # now we can operate on vec_sz byte chunks, branch to `fail` if they + # don't pass validation. # get high nibbles of input shifted by 1 byte - f"sv.rldicl *80, *47, {64 - 4}, 4", # srdi *80, *47, 4 + # srdi *{temp_vec2}, *{cur_bytes - 1}, 4 + f"sv.rldicl *{temp_vec2}, *{cur_bytes - 1}, {64 - 4}, 4", # look-up nibbles in table, writing to error flags - "sv.lbzx *64, 6, *80", + f"sv.lbzx *{temp_vec1}, 6, *{temp_vec2}", # get low nibbles of input shifted by 1 byte - f"sv.andi. *80, *47, {0xF}", # there is no andi without Rc + # there is no andi without Rc + # sv.andi. with scalars is buggy, so use a temporary and sv.and + f"addi 9, 0, {0xF}", + f"sv.and *{temp_vec2}, *{cur_bytes - 1}, 9", # look-up nibbles in table - "sv.lbzx *80, 6, *80", + f"sv.lbzx *{temp_vec2}, 6, *{temp_vec2}", # bitwise and into error flags - "sv.and *64, *64, *80", + f"sv.and *{temp_vec1}, *{temp_vec1}, *{temp_vec2}", # get high nibbles of input - "sv.srdi *80, *48, 4", + # srdi *{temp_vec2}, *{cur_bytes}, 4 + f"sv.rldicl *{temp_vec2}, *{cur_bytes}, {64 - 4}, 4", # look-up nibbles in table - "sv.lbzx *80, 6, *80", + f"sv.lbzx *{temp_vec2}, 6, *{temp_vec2}", # bitwise and into error flags - "sv.and *64, *64, *80", + f"sv.and *{temp_vec1}, *{temp_vec1}, *{temp_vec2}", - # or-reduce error flags into r96 - "sv.mv *80, *64" - "sv.or *81, *80, *81", + # or-reduce error flags into temp_vec2_end + f"sv.ori *{temp_vec2}, *{temp_vec1}, 0", + f"sv.or *{temp_vec2 + 1}, *{temp_vec2}, *{temp_vec2 + 1}", # check for any actual error flags set - f"sv.andi. 96, 96, {UTF8FirstTwoBytesError.AllActualErrors}", - "bne fail", + # sv.andi. is buggy, so use sv.and, then compare + f"addi 9, 0, {UTF8FirstTwoBytesError.AllActualErrors}", + f"sv.and 9, {temp_vec2_end}, 9", + f"cmpli 0, 1, 9, 0", + f"bc 4, 2, fail", # bne fail # check for the correct number of continuation bytes for 3/4-byte cases # set bit 0x80 (TwoContinuations) if input is >= 0xE0 - f"sv.subi/satu *80, *46, {0xE0 - 0x80}", + f"sv.cmpli *0, 1, *{cur_bytes - 2}, {0xE0}", # xor into error flags - "sv.xor *64, *64, *80", + f"sv.xori/m=ge *{temp_vec1}, *{temp_vec1}, {0x80}", # set bit 0x80 (TwoContinuations) if input is >= 0xF0 - f"sv.subi/satu *80, *45, {0xF0 - 0x80}", + f"sv.cmpli *0, 1, *{cur_bytes - 2}, {0xF0}", # xor into error flags - "sv.xor *80, *64, *80", - # now bit 0x80 is set in r80-95 if there's an error - # or-reduce into r96 - "sv.or *81, *80, *81", + f"sv.xori/m=ge *{temp_vec1}, *{temp_vec1}, {0x80}", + # now bit 0x80 is set in temp_vec1 if there's an error + # or-reduce into temp_vec2 + f"sv.or *{temp_vec1 + 1}, *{temp_vec1}, *{temp_vec1 + 1}", # adjust count/pointer - "add 3, 3, 5", # increment pointer - "sub 4, 4, 5", # decrement count - f"sv.andi. 96, 96, {0x80}", # check if any errors - "beq loop", # if no errors loop, else fail - "fail:", - "li 3, 0", - "blr", - "final_check:", + f"add 3, 3, 5", # increment pointer + f"sub 4, 4, 5", # decrement count + f"sv.andi. {temp_vec2}, {temp_vec2}, {0x80}", # check if any errors + f"bc 12, 2, loop", # beq loop # if no errors loop, else fail + f"fail:", + f"addi 3, 0, 0", + f"blr", + f"final_check:", # check if prev input is incomplete # check if byte 3 bytes from end needed 4 bytes - f"sv.cmpli 0, 1, 45, {0xF0}", - "bge fail", + f"sv.cmpli 0, 1, {cur_bytes - 3}, {0xF0}", + f"bc 4, 0, fail", # bge fail # check if byte 2 bytes from end needed 3 bytes - f"sv.cmpli 0, 1, 46, {0xE0}", - "bge fail", + f"sv.cmpli 0, 1, {cur_bytes - 2}, {0xE0}", + f"bc 4, 0, fail", # bge fail # check if byte 1 bytes from end needed 2 bytes - f"sv.cmpli 0, 1, 47, {0xC0}", - "bge fail", - "li 3, 1", - "blr", + f"sv.cmpli 0, 1, {cur_bytes - 1}, {0xC0}", + f"bc 4, 0, fail", # bge fail + f"addi 3, 0, 1", + f"blr", ] +def assemble(instructions, start_pc=0): + pc = start_pc + macros = {} + out_instructions = [] + for instr in instructions: + m = re.fullmatch(r" *([a-zA-Z0-9_]+): *(#.*)?", instr) + if m is not None: + name = m.group(1) + if name in macros: + raise ValueError(f"label {name!r} defined multiple times") + macros[name] = str(pc) + continue + m = re.fullmatch(r" *sv\.[a-zA-Z0-9_].*", instr) + if m is not None: + pc += 8 + else: + pc += 4 + out_instructions.append(instr) + for k, v in macros.items(): + out_instructions.append(f".set {k}, {v}") + return Program(list(SVP64Asm(out_instructions)), 0) + + class SVP64UTF8ValidationTestCase(TestAccumulatorBase): - def run_case(self, data): - # type: (bytes) -> None + @cached_property + def program(self): + return assemble(svp64_utf8_validation_asm()) + + def run_case(self, data, src_loc_at=0): + # type: (bytes, int) -> None expected = 1 try: data.decode("utf-8") except UnicodeDecodeError: expected = 0 - isa = SVP64Asm(svp64_utf8_validation_asm()) - lst = list(isa) initial_regs = [0x15cee3293aa9bfbe] * 128 # fill with junk initial_regs[3] = 0x10000 # pointer to bytes to check initial_regs[4] = len(data) # length of bytes to check @@ -241,11 +285,14 @@ class SVP64UTF8ValidationTestCase(TestAccumulatorBase): initial_mem[i + initial_regs[3]] = v, 1 stop_at_pc = 0x10000000 initial_sprs = {8: SelectableInt(stop_at_pc, 64)} - e = ExpectedState(pc=stop_at_pc) + e = ExpectedState(pc=stop_at_pc, int_regs=4, crregs=0, fp_regs=0) + e.intregs[:3] = initial_regs[:3] e.intregs[3] = expected - self.add_case(Program(lst, 0), initial_regs, initial_mem=initial_mem, - initial_sprs=initial_sprs, stop_at_pc=stop_at_pc, - expected=e) + with self.subTest(data=repr(data), expected=expected): + self.add_case(self.program, initial_regs, initial_mem=initial_mem, + initial_sprs=initial_sprs, stop_at_pc=stop_at_pc, + expected=e, + src_loc_at=src_loc_at + 1) def run_cases(self, data): # type: (bytes | str) -> None @@ -255,124 +302,163 @@ class SVP64UTF8ValidationTestCase(TestAccumulatorBase): for i in range(len(data)): part = data[i:] for j in range(len(part)): - self.run_case(part[:j]) + self.run_case(part[:j], src_loc_at=1) def case_empty(self): self.run_case(b"") + @skip_case def case_nul(self): self.run_cases("\u0000") # min 1-byte + @skip_case def case_a(self): self.run_cases("a") + @skip_case def case_7f(self): self.run_cases("\u007F") # max 1-byte + @skip_case def case_c0_80(self): self.run_cases(b"\xC0\x80") # min 2-byte overlong encoding + @skip_case def case_c1_bf(self): self.run_cases(b"\xC1\xBF") # max 2-byte overlong encoding + @skip_case def case_u0080(self): self.run_cases("\u0080") # min 2-byte + @skip_case def case_u07ff(self): self.run_cases("\u07FF") # max 2-byte + @skip_case def case_e0_80_80(self): self.run_cases(b"\xE0\x80\x80") # min 3-byte overlong encoding + @skip_case def case_e0_9f_bf(self): self.run_cases(b"\xE0\x9F\xBF") # max 3-byte overlong encoding + @skip_case def case_u0800(self): self.run_cases("\u0800") # min 3-byte + @skip_case def case_u0fff(self): self.run_cases("\u0FFF") + @skip_case def case_u1000(self): self.run_cases("\u1000") + @skip_case def case_ucfff(self): self.run_cases("\uCFFF") + @skip_case def case_ud000(self): self.run_cases("\uD000") + @skip_case def case_ud7ff(self): self.run_cases("\uD7FF") - def case_ud800(self): - self.run_cases("\uD800") # surrogate + @skip_case + def case_ed_a0_80(self): + self.run_cases(b"\xED\xA0\x80") # first high surrogate - def case_udbff(self): - self.run_cases("\uDBFF") # surrogate + @skip_case + def case_ed_af_bf(self): + self.run_cases(b"\xED\xAF\xBF") # last high surrogate - def case_udc00(self): - self.run_cases("\uDC00") # surrogate + @skip_case + def case_ed_b0_80(self): + self.run_cases(b"\xED\xB0\x80") # first low surrogate - def case_udfff(self): - self.run_cases("\uDFFF") # surrogate + @skip_case + def case_ed_bf_bf(self): + self.run_cases(b"\xED\xBF\xBF") # last low surrogate + @skip_case def case_ue000(self): self.run_cases("\uE000") + @skip_case def case_uffff(self): self.run_cases("\uFFFF") # max 3-byte + @skip_case def case_f0_80_80_80(self): self.run_cases(b"\xF0\x80\x80\x80") # min 4-byte overlong encoding + @skip_case def case_f0_bf_bf_bf(self): self.run_cases(b"\xF0\x8F\xBF\xBF") # max 4-byte overlong encoding + @skip_case def case_u00010000(self): self.run_cases("\U00010000") # min 4-byte + @skip_case def case_u0003ffff(self): self.run_cases("\U0003FFFF") + @skip_case def case_u00040000(self): self.run_cases("\U00040000") + @skip_case def case_u000fffff(self): self.run_cases("\U000FFFFF") + @skip_case def case_u00100000(self): self.run_cases("\U00100000") + @skip_case def case_u0010ffff(self): self.run_cases("\U0010FFFF") # max 4-byte + @skip_case def case_f4_90_80_80(self): self.run_cases(b"\xF4\x90\x80\x80") # first too-big encoding + @skip_case def case_f7_bf_bf_bf(self): self.run_cases(b"\xF7\xBF\xBF\xBF") # max too-big 4-byte encoding + @skip_case def case_f8_x4_80(self): self.run_cases(b"\xF8" + b"\x80" * 4) # min too-big 5-byte encoding + @skip_case def case_fb_x4_bf(self): self.run_cases(b"\xFB" + b"\xBF" * 4) # max too-big 5-byte encoding + @skip_case def case_fc_x5_80(self): self.run_cases(b"\xFC" + b"\x80" * 5) # min too-big 6-byte encoding + @skip_case def case_fd_x5_bf(self): self.run_cases(b"\xFD" + b"\xBF" * 5) # max too-big 6-byte encoding + @skip_case def case_fe_x6_80(self): self.run_cases(b"\xFE" + b"\x80" * 6) # min too-big 7-byte encoding + @skip_case def case_fe_x6_bf(self): self.run_cases(b"\xFE" + b"\xBF" * 6) # max too-big 7-byte encoding + @skip_case def case_ff_x7_80(self): self.run_cases(b"\xFF" + b"\x80" * 7) # min too-big 8-byte encoding + @skip_case def case_ff_x7_bf(self): self.run_cases(b"\xFF" + b"\xBF" * 7) # max too-big 8-byte encoding -- 2.30.2