From a3f01e744340f8a9b88f7454e75c41878f229740 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 2 Oct 2022 12:27:18 +0100 Subject: [PATCH] TODO notes on using sv.bc in utf8-validator --- .../test/algorithms/svp64_utf_8_validation.py | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/openpower/test/algorithms/svp64_utf_8_validation.py b/src/openpower/test/algorithms/svp64_utf_8_validation.py index afcbde68..fd0c39e0 100644 --- a/src/openpower/test/algorithms/svp64_utf_8_validation.py +++ b/src/openpower/test/algorithms/svp64_utf_8_validation.py @@ -60,7 +60,8 @@ class UTF8FirstTwoBytesError(enum.IntFlag): # look up tables for checking for errors in the first two bytes, the final # error flags are generated by looking up the nibbles of the first two bytes -# in the appropriate tables, and bitwise ANDing the results together. +# in the appropriate tables, and bitwise ANDing the results together +# (XXX TODO: investigate use sv.bc/all instead) # To figure out what to put in each entry in the LUTs, look for all cases # that could match the comment. @@ -145,6 +146,8 @@ def svp64_utf8_validation_asm(): # nibbles to look up in r64-r71 -- u64x8 temp_vec2 = cur_bytes + vec_sz * 2 temp_vec2_end = temp_vec2 + vec_sz + # temp consts 0xF0 0xE0 0xC0 (accessible in scalar range) + consts = 24 def sv_set_0x80_if_ge(out_v, inp_v, temp_s, compare_rhs): # type: (int, int, int, int) -> list[str] @@ -167,6 +170,10 @@ def svp64_utf8_validation_asm(): return [ # input addr in r3, input length in r4 f"setvl 0, 0, {prev_bytes_sz}, 0, 1, 1", # set VL to prev_bytes_sz + # create 3 consts 0xf0 0xe0 0xc0 (within scalar range, saves instrs) + "ori %d, 0, 0xF0" % (consts), + "ori %d, 0, 0xC0" % (consts+1), + "ori %d, 0, 0xC0" % (consts+2), # clear what will go into prev bytes f"sv.addi *{cur_bytes + vec_sz - prev_bytes_sz}, 0, 0", f"addis 6, 0, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}", @@ -218,6 +225,7 @@ def svp64_utf8_validation_asm(): f"sv.and *{temp_vec1}, *{temp_vec1}, *{temp_vec2}", # or-reduce error flags into temp_vec2_end + # XXX can likely use sv.cmpli followed by sv.bc here f"sv.addi {temp_vec2_end}, 0, 0", f"sv.ori *{temp_vec2}, *{temp_vec1}, 0", f"sv.or *{temp_vec2 + 1}, *{temp_vec2}, *{temp_vec2 + 1}", @@ -242,6 +250,7 @@ def svp64_utf8_validation_asm(): f"sv.xor *{temp_vec1}, *{temp_vec1}, *{temp_vec2}", # now bit 0x80 is set in temp_vec1 if there's an error # or-reduce into temp_vec2 + # XXX can likely use sv.bc here, just use the Rc-vector directly f"sv.addi {temp_vec2}, 0, 0", f"sv.or *{temp_vec1 + 1}, *{temp_vec1}, *{temp_vec1 + 1}", # adjust count/pointer @@ -256,23 +265,15 @@ def svp64_utf8_validation_asm(): f"bclr 20, 0, 0 # blr", f"final_check:", - # need to set VL to something non-zero otherwise all our scalar - # instructions don't run --- I definitely don't like that ... scalar - # instructions should run regardless of VL. - f"setvl 0, 0, 1, 0, 1, 1", # set VL to 1 + # next loop runs on 3 consts to check 3 bytes + "setvl 0, 0, 3, 0, 1, 1", # set VL to 3 # check if prev input is incomplete - # check if byte 3 bytes from end needed 4 bytes - f"sv.cmpli 0, 1, {cur_bytes - 3}, {0xF0}", - f"bc 4, 0, fail # bge fail", - # check if byte 2 bytes from end needed 3 bytes - f"sv.cmpli 0, 1, {cur_bytes - 2}, {0xE0}", - f"bc 4, 0, fail # bge fail", - # check if byte 1 bytes from end needed 2 bytes - f"sv.cmpli 0, 1, {cur_bytes - 1}, {0xC0}", - f"bc 4, 0, fail # bge fail", - f"addi 3, 0, 1", - f"bclr 20, 0, 0 # blr", + # check if byte 3,2,1 bytes from end needed 4,3,2 bytes + "sv.cmpl 0, 1, %d, %d" % (cur_bytes - 3, consts), + "sv.bc 4, *0, fail # bge fail", + "addi 3, 0, 1", + "bclr 20, 0, 0 # blr", ] -- 2.30.2