1 # SPDX-License-Identifier: LGPL-3-or-later
2 # Copyright 2022 Jacob Lifshay
6 from openpower
.decoder
.selectable_int
import SelectableInt
7 from openpower
.simulator
.program
import Program
8 from openpower
.test
.common
import TestAccumulatorBase
, skip_case
9 from openpower
.test
.state
import ExpectedState
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from cached_property
import cached_property
14 SVP64_UTF8_VALIDATION_DATA_ADDR
= 0x10000
17 class UTF8FirstTwoBytesError(enum
.IntFlag
):
18 """ Error conditions that are detectable from just the first two bytes in
23 """ ascii byte followed by a continuation byte """
26 """ leading byte followed by something other than a continuation byte """
29 """ value is `< 0x80` but is encoded using 2 bytes """
32 """ value is a surrogate (`0xD800 <= value <= 0xDFFF`) """
35 """ value is `< 0x800` but is encoded using 3 bytes """
37 Overlong4OrTooLarge
= 1 << 5
39 * `< 0x10000` but is encoded using 4 bytes
40 * or the value is `>= 0x140000` with the first continuation byte
43 The rest of the cases where the value is `> 0x10FFFF` are covered by
48 """ value is `> 0x10FFFF` with the first continuation byte being `>= 0x90`
50 The rest of the cases where the value is `> 0x10FFFF` are covered by
51 `Overlong4OrTooLarge`.
54 TwoContinuations
= 1 << 7
55 """ not actually an error -- two continuations in a row """
57 AllActualErrors
= (TooLong | TooShort | Overlong2 | Surrogate |
58 Overlong3 | Overlong4OrTooLarge | TooLarge
)
61 # look up tables for checking for errors in the first two bytes, the final
62 # error flags are generated by looking up the nibbles of the first two bytes
63 # in the appropriate tables, and bitwise ANDing the results together.
64 # To figure out what to put in each entry in the LUTs, look for all cases
65 # that could match the comment.
67 _TLN
= UTF8FirstTwoBytesError
.TooLong
68 _TS
= UTF8FirstTwoBytesError
.TooShort
69 _O2
= UTF8FirstTwoBytesError
.Overlong2
70 _SG
= UTF8FirstTwoBytesError
.Surrogate
71 _O3
= UTF8FirstTwoBytesError
.Overlong3
72 _O4TL
= UTF8FirstTwoBytesError
.Overlong4OrTooLarge
73 _TLG
= UTF8FirstTwoBytesError
.TooLarge
74 _2C
= UTF8FirstTwoBytesError
.TwoContinuations
76 FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR
= 0xFF00
77 FIRST_BYTE_HIGH_NIBBLE_LUT
= [
78 _TLN
, # first 2 bytes are 0x0? 0x??
79 _TLN
, # first 2 bytes are 0x1? 0x??
80 _TLN
, # first 2 bytes are 0x2? 0x??
81 _TLN
, # first 2 bytes are 0x3? 0x??
82 _TLN
, # first 2 bytes are 0x4? 0x??
83 _TLN
, # first 2 bytes are 0x5? 0x??
84 _TLN
, # first 2 bytes are 0x6? 0x??
85 _TLN
, # first 2 bytes are 0x7? 0x??
86 _2C
, # first 2 bytes are 0x8? 0x??
87 _2C
, # first 2 bytes are 0x9? 0x??
88 _2C
, # first 2 bytes are 0xA? 0x??
89 _2C
, # first 2 bytes are 0xB? 0x??
90 _TS | _O2
, # first 2 bytes are 0xC? 0x??
91 _TS
, # first 2 bytes are 0xD? 0x??
92 _TS | _SG | _O3
, # first 2 bytes are 0xE? 0x??
93 _TS | _O4TL | _TLG
, # first 2 bytes are 0xF? 0x??
95 FIRST_BYTE_LOW_NIBBLE_LUT_ADDR
= 0xFF10
96 FIRST_BYTE_LOW_NIBBLE_LUT
= [
97 _TLN | _TS | _O2 | _O3 | _O4TL | _2C
, # first 2 bytes are 0x?0 0x??
98 _TLN | _TS | _O2 | _2C
, # first 2 bytes are 0x?1 0x??
99 _TLN | _TS | _2C
, # first 2 bytes are 0x?2 0x??
100 _TLN | _TS | _2C
, # first 2 bytes are 0x?3 0x??
101 _TLN | _TS | _TLG | _2C
, # first 2 bytes are 0x?4 0x??
102 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?5 0x??
103 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?6 0x??
104 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?7 0x??
105 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?8 0x??
106 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?9 0x??
107 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?A 0x??
108 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?B 0x??
109 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?C 0x??
110 _TLN | _TS | _SG | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?D 0x??
111 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?E 0x??
112 _TLN | _TS | _O4TL | _TLG | _2C
, # first 2 bytes are 0x?F 0x??
114 SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR
= 0xFF20
115 SECOND_BYTE_HIGH_NIBBLE_LUT
= [
116 _TS
, # first 2 bytes are 0x?? 0x0?
117 _TS
, # first 2 bytes are 0x?? 0x1?
118 _TS
, # first 2 bytes are 0x?? 0x2?
119 _TS
, # first 2 bytes are 0x?? 0x3?
120 _TS
, # first 2 bytes are 0x?? 0x4?
121 _TS
, # first 2 bytes are 0x?? 0x5?
122 _TS
, # first 2 bytes are 0x?? 0x6?
123 _TS
, # first 2 bytes are 0x?? 0x7?
124 _TLN | _O2 | _O3 | _O4TL | _2C
, # first 2 bytes are 0x?? 0x8?
125 _TLN | _O2 | _O3 | _TLG | _2C
, # first 2 bytes are 0x?? 0x9?
126 _TLN | _O2 | _SG | _TLG | _2C
, # first 2 bytes are 0x?? 0xA?
127 _TLN | _O2 | _SG | _TLG | _2C
, # first 2 bytes are 0x?? 0xB?
128 _TS
, # first 2 bytes are 0x?? 0xC?
129 _TS
, # first 2 bytes are 0x?? 0xD?
130 _TS
, # first 2 bytes are 0x?? 0xE?
131 _TS
, # first 2 bytes are 0x?? 0xF?
135 def svp64_utf8_validation_asm():
136 vec_sz
= 8 # limited by number of CR fields implemented in the simulator
138 # cur bytes in r48-r63 -- u64x16
140 # prev bytes in r45-r47 -- u64x3
142 prev_bytes
= cur_bytes
- prev_bytes_sz
143 # error flags in r56-r63 -- u64x8
144 temp_vec1
= cur_bytes
+ vec_sz
145 # nibbles to look up in r64-r71 -- u64x8
146 temp_vec2
= cur_bytes
+ vec_sz
* 2
147 temp_vec2_end
= temp_vec2
+ vec_sz
149 def sv_set_0x80_if_ge(out_v
, inp_v
, temp_s
, compare_rhs
):
150 # type: (int, int, int, int) -> list[str]
151 """ generate values with bit 0x80 set if the input vector is
152 unsigned `>= compare_rhs`, this assumes `0x80 <= compare_rhs <= 0xFF`
153 and the input vector elements are in `0 <= v <= 0xFF`.
155 can't use CRs for this, since vectors of CRs used as masks currently
156 max out at 4 in the simulator.
158 assert 0x80 <= compare_rhs
<= 0xFF, \
159 "the algorithm only works if compare_rhs is in range"
160 max_arg
= compare_rhs
- 1
161 add_arg
= 0x80 - compare_rhs
163 f
"addi {temp_s}, 0, {max_arg}",
164 f
"sv.maxu *{out_v}, *{inp_v}, {temp_s}",
165 f
"sv.addi *{out_v}, *{out_v}, {add_arg}"
168 # input addr in r3, input length in r4
169 f
"setvl 0, 0, {prev_bytes_sz}, 0, 1, 1", # set VL to prev_bytes_sz
170 # clear what will go into prev bytes
171 f
"sv.addi *{cur_bytes + vec_sz - prev_bytes_sz}, 0, 0",
172 f
"addis 6, 0, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}",
173 f
"ori 6, 6, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR & 0xFFFF}",
174 f
"addis 7, 0, {FIRST_BYTE_LOW_NIBBLE_LUT_ADDR >> 16}",
175 f
"ori 7, 7, {FIRST_BYTE_LOW_NIBBLE_LUT_ADDR & 0xFFFF}",
176 f
"addis 8, 0, {SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}",
177 f
"ori 8, 8, {SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR & 0xFFFF}",
179 f
"setvl 0, 0, {prev_bytes_sz}, 0, 1, 1", # set VL to prev_bytes_sz
180 # copy prev bytes from end of cur bytes
181 f
"sv.ori *{prev_bytes}, *{cur_bytes + vec_sz - prev_bytes_sz}, 0",
183 # clear cur bytes, so bytes beyond end end up being zeros
184 f
"setvl 0, 0, {vec_sz}, 0, 1, 1", # set VL to vec_sz
185 f
"sv.addi *{cur_bytes}, 0, 0", # clear cur bytes
186 f
"setvl. 5, 4, {vec_sz}, 0, 1, 1", # set VL to min(vec_sz, r4)
187 # if no bytes left to load, run final check
188 f
"bc 12, 2, final_check # beq final_check",
189 # sv.lbz/els is buggy, use sv.lbzx instead:
190 f
"sv.addi *{cur_bytes + 1}, *{cur_bytes}, 1", # create indexes
191 f
"sv.lbzx *{cur_bytes}, {inp_addr}, *{cur_bytes}", # load bytes
192 f
"setvl 0, 0, {vec_sz}, 0, 1, 1", # set VL to vec_sz
193 # now we can operate on vec_sz byte chunks, branch to `fail` if they
194 # don't pass validation.
196 # get high nibbles of input shifted by 1 byte
197 (f
"sv.rldicl *{temp_vec2}, *{cur_bytes - 1}, {64 - 4}, 4"
198 f
" # sv.srdi *{temp_vec2}, *{cur_bytes - 1}, 4"),
199 # look-up nibbles in table, writing to error flags
200 f
"sv.lbzx *{temp_vec1}, 6, *{temp_vec2}",
202 # get low nibbles of input shifted by 1 byte
203 # there is no andi without Rc
204 # sv.andi. with scalars is buggy, so use a temporary and sv.and
206 f
"sv.and *{temp_vec2}, *{cur_bytes - 1}, 9",
207 # look-up nibbles in table
208 f
"sv.lbzx *{temp_vec2}, 7, *{temp_vec2}",
209 # bitwise and into error flags
210 f
"sv.and *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
212 # get high nibbles of input
213 # srdi *{temp_vec2}, *{cur_bytes}, 4
214 f
"sv.rldicl *{temp_vec2}, *{cur_bytes}, {64 - 4}, 4",
215 # look-up nibbles in table
216 f
"sv.lbzx *{temp_vec2}, 8, *{temp_vec2}",
217 # bitwise and into error flags
218 f
"sv.and *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
220 # or-reduce error flags into temp_vec2_end
221 f
"sv.addi {temp_vec2_end}, 0, 0",
222 f
"sv.ori *{temp_vec2}, *{temp_vec1}, 0",
223 f
"sv.or *{temp_vec2 + 1}, *{temp_vec2}, *{temp_vec2 + 1}",
224 # check for any actual error flags set
225 # sv.andi. is buggy, so use sv.and, then compare
226 f
"addi 9, 0, {UTF8FirstTwoBytesError.AllActualErrors}",
227 f
"sv.and 9, {temp_vec2_end}, 9",
229 f
"bc 4, 2, fail # bne fail",
231 # check for the correct number of continuation bytes for 3/4-byte cases
233 # set bit 0x80 (TwoContinuations) if input is >= 0xE0
234 *sv_set_0x80_if_ge(out_v
=temp_vec2
, inp_v
=cur_bytes
- 2,
235 temp_s
=9, compare_rhs
=0xE0),
236 # xor into error flags
237 f
"sv.xor *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
238 # set bit 0x80 (TwoContinuations) if input is >= 0xF0
239 *sv_set_0x80_if_ge(out_v
=temp_vec2
, inp_v
=cur_bytes
- 3,
240 temp_s
=9, compare_rhs
=0xF0),
241 # xor into error flags
242 f
"sv.xor *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
243 # now bit 0x80 is set in temp_vec1 if there's an error
244 # or-reduce into temp_vec2
245 f
"sv.addi {temp_vec2}, 0, 0",
246 f
"sv.or *{temp_vec1 + 1}, *{temp_vec1}, *{temp_vec1 + 1}",
247 # adjust count/pointer
248 f
"add 3, 3, 5", # increment pointer
249 f
"subf 4, 5, 4", # decrement count
250 # sv.andi. is buggy, so move to r9 first
251 f
"sv.ori 9, {temp_vec2}, 0",
252 f
"andi. 9, 9, {0x80}", # check if any errors
253 f
"bc 12, 2, loop # beq loop", # if no errors loop, else fail
256 f
"bclr 20, 0, 0 # blr",
259 # need to set VL to 1, here
260 # https://bugs.libre-soc.org/show_bug.cgi?id=905
261 # (SVP64Single is planned for accessing high-regnumbers as Scalars)
263 # setting VL=0 (often set dynamically at runtime in Standard Cray
264 # Vectors) is the standard canonical way in Cray Vectors to legitimately
265 # request instructions not to be run at all (nops).
267 # a workaround for not having SVP64Single right now and still get
268 # at the high register numbers (32-127) is to static-set VL=1
269 # the alternative is to move cur_bytes to reg numbers 0-31 but
270 # 16 regs within the range 0-31 is a lot to ask for.
271 f
"setvl 0, 0, 1, 0, 1, 1", # set VL to 1
273 # check if prev input is incomplete
274 # check if byte 3 bytes from end needed 4 bytes
275 f
"sv.cmpli 0, 1, {cur_bytes - 3}, {0xF0}",
276 f
"bc 4, 0, fail # bge fail",
277 # check if byte 2 bytes from end needed 3 bytes
278 f
"sv.cmpli 0, 1, {cur_bytes - 2}, {0xE0}",
279 f
"bc 4, 0, fail # bge fail",
280 # check if byte 1 bytes from end needed 2 bytes
281 f
"sv.cmpli 0, 1, {cur_bytes - 1}, {0xC0}",
282 f
"bc 4, 0, fail # bge fail",
284 f
"bclr 20, 0, 0 # blr",
288 def assemble(instructions
, start_pc
=0):
291 out_instructions
= []
292 for instr
in instructions
:
293 m
= re
.fullmatch(r
" *([a-zA-Z0-9_]+): *(#.*)?", instr
)
297 raise ValueError(f
"label {name!r} defined multiple times")
300 m
= re
.fullmatch(r
" *sv\.[a-zA-Z0-9_].*", instr
)
305 out_instructions
.append((pc
, instr
))
308 for (idx
, (pc
, instr
)) in enumerate(tuple(out_instructions
)):
309 for (label
, target
) in labels
.items():
313 addr
= (target
- pc
+ 4)
316 addr
= (pc
- target
- 4)
319 instr
= instr
.replace(label
, f
"{sign}0x{addr:X}")
321 out_instructions
[idx
] = instr
323 for k
, v
in labels
.items():
324 out_instructions
.append(f
".set {k}, . - 0x{last_pc - v:X} # 0x{v:X}")
326 return Program(list(SVP64Asm(out_instructions
)), 0)
329 class SVP64UTF8ValidationTestCase(TestAccumulatorBase
):
331 self
.__seen
_cases
= set()
336 return assemble(svp64_utf8_validation_asm())
338 def run_case(self
, data
, src_loc_at
=0):
339 # type: (bytes, int) -> None
340 if data
in self
.__seen
_cases
:
342 self
.__seen
_cases
.add(data
)
346 except UnicodeDecodeError:
348 initial_regs
= [0x15cee3293aa9bfbe] * 128 # fill with junk
349 initial_regs
[3] = 0x10000 # pointer to bytes to check
350 initial_regs
[4] = len(data
) # length of bytes to check
353 for i
, v
in enumerate(data
):
354 initial_mem
[i
+ initial_regs
[3]] = v
, 1
355 for i
, v
in enumerate(FIRST_BYTE_LOW_NIBBLE_LUT
):
356 initial_mem
[i
+ FIRST_BYTE_LOW_NIBBLE_LUT_ADDR
] = int(v
), 1
357 for i
, v
in enumerate(FIRST_BYTE_HIGH_NIBBLE_LUT
):
358 initial_mem
[i
+ FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR
] = int(v
), 1
359 for i
, v
in enumerate(SECOND_BYTE_HIGH_NIBBLE_LUT
):
360 initial_mem
[i
+ SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR
] = int(v
), 1
361 stop_at_pc
= 0x10000000
362 initial_sprs
= {8: SelectableInt(stop_at_pc
, 64)}
363 e
= ExpectedState(pc
=stop_at_pc
, int_regs
=4, crregs
=0, fp_regs
=0,
364 so
=None, ov
=None, ca
=None)
365 e
.intregs
[:3] = initial_regs
[:3]
366 e
.intregs
[3] = expected
367 with self
.subTest(data
=data
, expected
=expected
):
368 self
.add_case(self
.program
, initial_regs
, initial_mem
=initial_mem
,
369 initial_sprs
=initial_sprs
, stop_at_pc
=stop_at_pc
,
371 src_loc_at
=src_loc_at
+ 1)
373 def run_cases(self
, data
):
374 # type: (bytes | str) -> None
375 if isinstance(data
, str):
376 data
= data
.encode("utf-8")
377 data
= data
.center(8, b
' ')
378 for i
in range(len(data
)):
379 self
.run_case(data
[i
:], src_loc_at
=1)
380 self
.run_case(data
[:i
], src_loc_at
=1)
382 def case_empty(self
):
385 def case_x6_sp_nul(self
):
386 self
.run_case(b
' ' * 6 + b
'\x00')
389 self
.run_cases("\u0000") # min 1-byte
395 self
.run_cases("\u007F") # max 1-byte
397 def case_c0_80(self
):
398 self
.run_cases(b
"\xC0\x80") # min 2-byte overlong encoding
400 def case_c1_bf(self
):
401 self
.run_cases(b
"\xC1\xBF") # max 2-byte overlong encoding
403 def case_u0080(self
):
404 self
.run_cases("\u0080") # min 2-byte
406 def case_u07ff(self
):
407 self
.run_cases("\u07FF") # max 2-byte
409 def case_e0_80_80(self
):
410 self
.run_cases(b
"\xE0\x80\x80") # min 3-byte overlong encoding
412 def case_e0_9f_bf(self
):
413 self
.run_cases(b
"\xE0\x9F\xBF") # max 3-byte overlong encoding
415 def case_u0800(self
):
416 self
.run_cases("\u0800") # min 3-byte
418 def case_u0fff(self
):
419 self
.run_cases("\u0FFF")
421 def case_u1000(self
):
422 self
.run_cases("\u1000")
424 def case_ucfff(self
):
425 self
.run_cases("\uCFFF")
427 def case_ud000(self
):
428 self
.run_cases("\uD000")
430 def case_ud7ff(self
):
431 self
.run_cases("\uD7FF")
433 def case_ed_a0_80(self
):
434 self
.run_cases(b
"\xED\xA0\x80") # first high surrogate
436 def case_ed_af_bf(self
):
437 self
.run_cases(b
"\xED\xAF\xBF") # last high surrogate
439 def case_ed_b0_80(self
):
440 self
.run_cases(b
"\xED\xB0\x80") # first low surrogate
442 def case_ed_bf_bf(self
):
443 self
.run_cases(b
"\xED\xBF\xBF") # last low surrogate
445 def case_ue000(self
):
446 self
.run_cases("\uE000")
448 def case_uffff(self
):
449 self
.run_cases("\uFFFF") # max 3-byte
451 def case_f0_80_80_80(self
):
452 self
.run_cases(b
"\xF0\x80\x80\x80") # min 4-byte overlong encoding
454 def case_f0_bf_bf_bf(self
):
455 self
.run_cases(b
"\xF0\x8F\xBF\xBF") # max 4-byte overlong encoding
457 def case_u00010000(self
):
458 self
.run_cases("\U00010000") # min 4-byte
460 def case_u0003ffff(self
):
461 self
.run_cases("\U0003FFFF")
463 def case_u00040000(self
):
464 self
.run_cases("\U00040000")
466 def case_u000fffff(self
):
467 self
.run_cases("\U000FFFFF")
469 def case_u00100000(self
):
470 self
.run_cases("\U00100000")
472 def case_u0010ffff(self
):
473 self
.run_cases("\U0010FFFF") # max 4-byte
475 def case_f4_90_80_80(self
):
476 self
.run_cases(b
"\xF4\x90\x80\x80") # first too-big encoding
478 def case_f7_bf_bf_bf(self
):
479 self
.run_cases(b
"\xF7\xBF\xBF\xBF") # max too-big 4-byte encoding
481 def case_f8_x4_80(self
):
482 self
.run_cases(b
"\xF8" + b
"\x80" * 4) # min too-big 5-byte encoding
484 def case_fb_x4_bf(self
):
485 self
.run_cases(b
"\xFB" + b
"\xBF" * 4) # max too-big 5-byte encoding
487 def case_fc_x5_80(self
):
488 self
.run_cases(b
"\xFC" + b
"\x80" * 5) # min too-big 6-byte encoding
490 def case_fd_x5_bf(self
):
491 self
.run_cases(b
"\xFD" + b
"\xBF" * 5) # max too-big 6-byte encoding
493 def case_fe_x6_80(self
):
494 self
.run_cases(b
"\xFE" + b
"\x80" * 6) # min too-big 7-byte encoding
496 def case_fe_x6_bf(self
):
497 self
.run_cases(b
"\xFE" + b
"\xBF" * 6) # max too-big 7-byte encoding
499 def case_ff_x7_80(self
):
500 self
.run_cases(b
"\xFF" + b
"\x80" * 7) # min too-big 8-byte encoding
502 def case_ff_x7_bf(self
):
503 self
.run_cases(b
"\xFF" + b
"\xBF" * 7) # max too-big 8-byte encoding