From e347fb846bba92dbec07b33f08e185daad9df68b Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Tue, 23 Aug 2022 22:14:25 -0700 Subject: [PATCH] finished writing svp64 utf-8 validation algorithm -- still buggy --- .../isa/test_caller_svp64_utf_8_validation.py | 39 ++++ .../test/algorithms/svp64_utf_8_validation.py | 221 ++++++++++++++++-- 2 files changed, 243 insertions(+), 17 deletions(-) create mode 100644 src/openpower/decoder/isa/test_caller_svp64_utf_8_validation.py diff --git a/src/openpower/decoder/isa/test_caller_svp64_utf_8_validation.py b/src/openpower/decoder/isa/test_caller_svp64_utf_8_validation.py new file mode 100644 index 00000000..37fbdf41 --- /dev/null +++ b/src/openpower/decoder/isa/test_caller_svp64_utf_8_validation.py @@ -0,0 +1,39 @@ +""" Decoder tests + +related bugs: + + * +""" + +import unittest +import sys + +# These tests utilize the run_hdl=False parameter to compare +# simulator with expected states +from soc.simple.test.test_runner import TestRunner +from openpower.test.algorithms.svp64_utf_8_validation import \ + SVP64UTF8ValidationTestCase + + +if __name__ == "__main__": + + # allow list of testing to be selected by command-line + testing = sys.argv[1:] + sys.argv = sys.argv[:1] + + if not testing: + testing = ['utf-8_validation'] + + unittest.main(exit=False) + suite = unittest.TestSuite() + + # dictionary of data for tests + tests = {'utf-8_validation': SVP64UTF8ValidationTestCase().test_data} + + # walk through all tests, those requested get added + for tname, data in tests.items(): + if tname in testing: + suite.addTest(TestRunner(data, run_hdl=False)) + + runner = unittest.TextTestRunner() + runner.run(suite) diff --git a/src/openpower/test/algorithms/svp64_utf_8_validation.py b/src/openpower/test/algorithms/svp64_utf_8_validation.py index dbb4f1df..efa5dcfe 100644 --- a/src/openpower/test/algorithms/svp64_utf_8_validation.py +++ b/src/openpower/test/algorithms/svp64_utf_8_validation.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: LGPL-3-or-later # Copyright 2022 Jacob Lifshay +import enum from openpower.decoder.selectable_int import SelectableInt from openpower.simulator.program import Program from openpower.test.common import TestAccumulatorBase @@ -8,30 +9,216 @@ from openpower.test.state import ExpectedState from openpower.sv.trans.svp64 import SVP64Asm +SVP64_UTF8_VALIDATION_DATA_ADDR = 0x10000 + + +class UTF8FirstTwoBytesError(enum.IntFlag): + """ Error conditions that are detectable from just the first two bytes in + a UTF-8 sequence. + """ + + TooLong = 1 << 0 + """ ascii byte followed by a continuation byte """ + + TooShort = 1 << 1 + """ leading byte followed by something other than a continuation byte """ + + Overlong2 = 1 << 2 + """ value is `< 0x80` but is encoded using 2 bytes """ + + Surrogate = 1 << 3 + """ value is a surrogate (`0xD800 <= value <= 0xDFFF`) """ + + Overlong3 = 1 << 4 + """ value is `< 0x800` but is encoded using 3 bytes """ + + Overlong4OrTooLarge = 1 << 5 + """ value is either: + * `< 0x10000` but is encoded using 4 bytes + * or the value is `>= 0x140000` with the first continuation byte + being `<= 0x8F` + + The rest of the cases where the value is `> 0x10FFFF` are covered by + `TooLarge`. + """ + + TooLarge = 1 << 6 + """ value is `> 0x10FFFF` with the first continuation byte being `>= 0x90` + + The rest of the cases where the value is `> 0x10FFFF` are covered by + `Overlong4OrTooLarge`. + """ + + TwoContinuations = 1 << 7 + """ not actually an error -- two continuations in a row """ + + AllActualErrors = (TooLong | TooShort | Overlong2 | Surrogate | + Overlong3 | Overlong4OrTooLarge | TooLarge) + + +# look up tables for checking for errors in the first two bytes, the final +# error flags are generated by looking up the nibbles of the first two bytes +# in the appropriate tables, and bitwise ANDing the results together. +# To figure out what to put in each entry in the LUTs, look for all cases +# that could match the comment. + +_TLN = UTF8FirstTwoBytesError.TooLong +_TS = UTF8FirstTwoBytesError.TooShort +_O2 = UTF8FirstTwoBytesError.Overlong2 +_SG = UTF8FirstTwoBytesError.Surrogate +_O3 = UTF8FirstTwoBytesError.Overlong3 +_O4TL = UTF8FirstTwoBytesError.Overlong4OrTooLarge +_TLG = UTF8FirstTwoBytesError.TooLarge +_2C = UTF8FirstTwoBytesError.TwoContinuations + +FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR = 0xFF00 +FIRST_BYTE_HIGH_NIBBLE_LUT = [ + _TLN, # first 2 bytes are 0x0? 0x?? + _TLN, # first 2 bytes are 0x1? 0x?? + _TLN, # first 2 bytes are 0x2? 0x?? + _TLN, # first 2 bytes are 0x3? 0x?? + _TLN, # first 2 bytes are 0x4? 0x?? + _TLN, # first 2 bytes are 0x5? 0x?? + _TLN, # first 2 bytes are 0x6? 0x?? + _TLN, # first 2 bytes are 0x7? 0x?? + _2C, # first 2 bytes are 0x8? 0x?? + _2C, # first 2 bytes are 0x9? 0x?? + _2C, # first 2 bytes are 0xA? 0x?? + _2C, # first 2 bytes are 0xB? 0x?? + _TS | _O2, # first 2 bytes are 0xC? 0x?? + _TS, # first 2 bytes are 0xD? 0x?? + _TS | _SG | _O3, # first 2 bytes are 0xE? 0x?? + _TS | _O4TL | _TLG, # first 2 bytes are 0xF? 0x?? +] +FIRST_BYTE_LOW_NIBBLE_LUT_ADDR = 0xFF10 +FIRST_BYTE_LOW_NIBBLE_LUT = [ + _TLN | _TS | _O2 | _O3 | _O4TL | _2C, # first 2 bytes are 0x?0 0x?? + _TLN | _TS | _O2 | _2C, # first 2 bytes are 0x?1 0x?? + _TLN | _TS | _2C, # first 2 bytes are 0x?2 0x?? + _TLN | _TS | _2C, # first 2 bytes are 0x?3 0x?? + _TLN | _TS | _TLG | _2C, # first 2 bytes are 0x?4 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?5 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?6 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?7 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?8 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?9 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?A 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?B 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?C 0x?? + _TLN | _TS | _SG | _O4TL | _TLG | _2C, # first 2 bytes are 0x?D 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?E 0x?? + _TLN | _TS | _O4TL | _TLG | _2C, # first 2 bytes are 0x?F 0x?? +] +SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR = 0xFF20 +SECOND_BYTE_HIGH_NIBBLE_LUT = [ + _TS, # first 2 bytes are 0x?? 0x0? + _TS, # first 2 bytes are 0x?? 0x1? + _TS, # first 2 bytes are 0x?? 0x2? + _TS, # first 2 bytes are 0x?? 0x3? + _TS, # first 2 bytes are 0x?? 0x4? + _TS, # first 2 bytes are 0x?? 0x5? + _TS, # first 2 bytes are 0x?? 0x6? + _TS, # first 2 bytes are 0x?? 0x7? + _TLN | _O2 | _O3 | _O4TL | _2C, # first 2 bytes are 0x?? 0x8? + _TLN | _O2 | _O3 | _TLG | _2C, # first 2 bytes are 0x?? 0x9? + _TLN | _O2 | _SG | _TLG | _2C, # first 2 bytes are 0x?? 0xA? + _TLN | _O2 | _SG | _TLG | _2C, # first 2 bytes are 0x?? 0xB? + _TS, # first 2 bytes are 0x?? 0xC? + _TS, # first 2 bytes are 0x?? 0xD? + _TS, # first 2 bytes are 0x?? 0xE? + _TS, # first 2 bytes are 0x?? 0xF? +] + + def svp64_utf8_validation_asm(): - raise NotImplementedError("not finished") + # raise NotImplementedError("not finished") return [ - "setvl 0, 0, 32, 0, 1, 1", # set VL to 32 - "sv.addi *64, 0, 0", # clear prev iter's bytes + # input addr in r3, input length in r4 + # prev bytes in r45-r47 -- u64x3 + # cur bytes in r48-r63 -- u64x16 + # nibbles to look up in r80-r95 -- u64x16 + # error flags in r64-r79 -- u64x16 + "setvl 0, 0, 3, 0, 1, 1", # set VL to 3 + "sv.addi *45, 0, 0", # clear prev bytes + f"lis 6, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}", + f"ori 6, 6, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR & 0xFFFF}", + f"lis 7, {FIRST_BYTE_LOW_NIBBLE_LUT_ADDR >> 16}", + f"ori 7, 7, {FIRST_BYTE_LOW_NIBBLE_LUT_ADDR & 0xFFFF}", + f"lis 8, {SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}", + f"ori 8, 8, {SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR & 0xFFFF}", "loop:", - "setvl. 0, 0, 32, 0, 1, 1", # set VL to 32 - "sv.ori *32, *64, 0", # copy prev iter's bytes - - # clear cur iter's bytes, so bytes beyond end end up being zeros - "sv.addi *64, 0, 0", - "setvl 5, 4, 32, 0, 1, 1", # set VL to min(32, r4) - "sv.lbz/els *64, 0(3)", # load bytes - "setvl 0, 0, 32, 0, 1, 1", # set VL to 32 - # now we can operate on 32 byte chunks, branch to `fail` if they don't + "setvl 0, 0, 3, 0, 1, 1", # set VL to 3 + "sv.ori *45, *61, 0", # copy prev bytes from end of cur bytes + + # clear cur bytes, so bytes beyond end end up being zeros + "setvl 0, 0, 16, 0, 1, 1", # set VL to 16 + "sv.addi *48, 0, 0", # clear cur bytes + "setvl. 5, 4, 16, 0, 1, 1", # set VL to min(16, r4) + "beq final_check", # if no bytes left to load, run final check + "sv.lbz/els *48, 0(3)", # load bytes + "setvl 0, 0, 16, 0, 1, 1", # set VL to 16 + # now we can operate on 16 byte chunks, branch to `fail` if they don't # pass validation. - # TODO: finish - # branch at end, so we check last bytes from prev iter first - "bne loop", - "li 3, 1", - "blr", + + # get high nibbles of input shifted by 1 byte + f"sv.rldicl *80, *47, {64 - 4}, 4", # srdi *80, *47, 4 + # look-up nibbles in table, writing to error flags + "sv.lbzx *64, 6, *80", + + # get low nibbles of input shifted by 1 byte + f"sv.andi. *80, *47, {0xF}", # there is no andi without Rc + # look-up nibbles in table + "sv.lbzx *80, 6, *80", + # bitwise and into error flags + "sv.and *64, *64, *80", + + # get high nibbles of input + "sv.srdi *80, *48, 4", + # look-up nibbles in table + "sv.lbzx *80, 6, *80", + # bitwise and into error flags + "sv.and *64, *64, *80", + + # or-reduce error flags into r96 + "sv.mv *80, *64" + "sv.or *81, *80, *81", + # check for any actual error flags set + f"sv.andi. 96, 96, {UTF8FirstTwoBytesError.AllActualErrors}", + "bne fail", + + # check for the correct number of continuation bytes for 3/4-byte cases + # set bit 0x80 (TwoContinuations) if input is >= 0xE0 + f"sv.subi/satu *80, *46, {0xE0 - 0x80}", + # xor into error flags + "sv.xor *64, *64, *80", + # set bit 0x80 (TwoContinuations) if input is >= 0xF0 + f"sv.subi/satu *80, *45, {0xF0 - 0x80}", + # xor into error flags + "sv.xor *80, *64, *80", + # now bit 0x80 is set in r80-95 if there's an error + # or-reduce into r96 + "sv.or *81, *80, *81", + # adjust count/pointer + "add 3, 3, 5", # increment pointer + "sub 4, 4, 5", # decrement count + f"sv.andi. 96, 96, {0x80}", # check if any errors + "beq loop", # if no errors loop, else fail "fail:", "li 3, 0", "blr", + "final_check:", + # check if prev input is incomplete + # check if byte 3 bytes from end needed 4 bytes + f"sv.cmpli 0, 1, 45, {0xF0}", + "bge fail", + # check if byte 2 bytes from end needed 3 bytes + f"sv.cmpli 0, 1, 46, {0xE0}", + "bge fail", + # check if byte 1 bytes from end needed 2 bytes + f"sv.cmpli 0, 1, 47, {0xC0}", + "bge fail", + "li 3, 1", + "blr", ] -- 2.30.2