# look up tables for checking for errors in the first two bytes, the final
# error flags are generated by looking up the nibbles of the first two bytes
-# in the appropriate tables, and bitwise ANDing the results together.
+# in the appropriate tables, and bitwise ANDing the results together
+# (XXX TODO: investigate use sv.bc/all instead)
# To figure out what to put in each entry in the LUTs, look for all cases
# that could match the comment.
# nibbles to look up in r64-r71 -- u64x8
temp_vec2 = cur_bytes + vec_sz * 2
temp_vec2_end = temp_vec2 + vec_sz
+ # temp consts 0xF0 0xE0 0xC0 (accessible in scalar range)
+ consts = 24
def sv_set_0x80_if_ge(out_v, inp_v, temp_s, compare_rhs):
# type: (int, int, int, int) -> list[str]
return [
# input addr in r3, input length in r4
f"setvl 0, 0, {prev_bytes_sz}, 0, 1, 1", # set VL to prev_bytes_sz
+ # create 3 consts 0xf0 0xe0 0xc0 (within scalar range, saves instrs)
+ "ori %d, 0, 0xF0" % (consts),
+ "ori %d, 0, 0xC0" % (consts+1),
+ "ori %d, 0, 0xC0" % (consts+2),
# clear what will go into prev bytes
f"sv.addi *{cur_bytes + vec_sz - prev_bytes_sz}, 0, 0",
f"addis 6, 0, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}",
f"sv.and *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
# or-reduce error flags into temp_vec2_end
+ # XXX can likely use sv.cmpli followed by sv.bc here
f"sv.addi {temp_vec2_end}, 0, 0",
f"sv.ori *{temp_vec2}, *{temp_vec1}, 0",
f"sv.or *{temp_vec2 + 1}, *{temp_vec2}, *{temp_vec2 + 1}",
f"sv.xor *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
# now bit 0x80 is set in temp_vec1 if there's an error
# or-reduce into temp_vec2
+ # XXX can likely use sv.bc here, just use the Rc-vector directly
f"sv.addi {temp_vec2}, 0, 0",
f"sv.or *{temp_vec1 + 1}, *{temp_vec1}, *{temp_vec1 + 1}",
# adjust count/pointer
f"bclr 20, 0, 0 # blr",
f"final_check:",
- # need to set VL to something non-zero otherwise all our scalar
- # instructions don't run --- I definitely don't like that ... scalar
- # instructions should run regardless of VL.
- f"setvl 0, 0, 1, 0, 1, 1", # set VL to 1
+ # next loop runs on 3 consts to check 3 bytes
+ "setvl 0, 0, 3, 0, 1, 1", # set VL to 3
# check if prev input is incomplete
- # check if byte 3 bytes from end needed 4 bytes
- f"sv.cmpli 0, 1, {cur_bytes - 3}, {0xF0}",
- f"bc 4, 0, fail # bge fail",
- # check if byte 2 bytes from end needed 3 bytes
- f"sv.cmpli 0, 1, {cur_bytes - 2}, {0xE0}",
- f"bc 4, 0, fail # bge fail",
- # check if byte 1 bytes from end needed 2 bytes
- f"sv.cmpli 0, 1, {cur_bytes - 1}, {0xC0}",
- f"bc 4, 0, fail # bge fail",
- f"addi 3, 0, 1",
- f"bclr 20, 0, 0 # blr",
+ # check if byte 3,2,1 bytes from end needed 4,3,2 bytes
+ "sv.cmpl 0, 1, %d, %d" % (cur_bytes - 3, consts),
+ "sv.bc 4, *0, fail # bge fail",
+ "addi 3, 0, 1",
+ "bclr 20, 0, 0 # blr",
]