src/openpower/test/algorithms/svp64_utf_8_validation.py

   1 # SPDX-License-Identifier: LGPL-3-or-later
   2 # Copyright 2022 Jacob Lifshay
   3
   4 import enum
   5 import re
   6 from openpower.decoder.selectable_int import SelectableInt
   7 from openpower.simulator.program import Program
   8 from openpower.test.common import TestAccumulatorBase, skip_case
   9 from openpower.test.state import ExpectedState
  10 from openpower.sv.trans.svp64 import SVP64Asm
  11 from cached_property import cached_property
  12
  13
  14 SVP64_UTF8_VALIDATION_DATA_ADDR = 0x10000
  15
  16
  17 class UTF8FirstTwoBytesError(enum.IntFlag):
  18     """ Error conditions that are detectable from just the first two bytes in
  19     a UTF-8 sequence.
  20     """
  21
  22     TooLong = 1 << 0
  23     """ ascii byte followed by a continuation byte """
  24
  25     TooShort = 1 << 1
  26     """ leading byte followed by something other than a continuation byte """
  27
  28     Overlong2 = 1 << 2
  29     """ value is `< 0x80` but is encoded using 2 bytes """
  30
  31     Surrogate = 1 << 3
  32     """ value is a surrogate (`0xD800 <= value <= 0xDFFF`) """
  33
  34     Overlong3 = 1 << 4
  35     """ value is `< 0x800` but is encoded using 3 bytes """
  36
  37     Overlong4OrTooLarge = 1 << 5
  38     """ value is either:
  39         * `< 0x10000` but is encoded using 4 bytes
  40         * or the value is `>= 0x140000` with the first continuation byte
  41             being `<= 0x8F`
  42
  43         The rest of the cases where the value is `> 0x10FFFF` are covered by
  44         `TooLarge`.
  45     """
  46
  47     TooLarge = 1 << 6
  48     """ value is `> 0x10FFFF` with the first continuation byte being `>= 0x90`
  49
  50         The rest of the cases where the value is `> 0x10FFFF` are covered by
  51         `Overlong4OrTooLarge`.
  52     """
  53
  54     TwoContinuations = 1 << 7
  55     """ not actually an error -- two continuations in a row """
  56
  57     AllActualErrors = (TooLong | TooShort | Overlong2 | Surrogate |
  58                        Overlong3 | Overlong4OrTooLarge | TooLarge)
  59
  60
  61 # look up tables for checking for errors in the first two bytes, the final
  62 # error flags are generated by looking up the nibbles of the first two bytes
  63 # in the appropriate tables, and bitwise ANDing the results together.
  64 # To figure out what to put in each entry in the LUTs, look for all cases
  65 # that could match the comment.
  66
  67 _TLN = UTF8FirstTwoBytesError.TooLong
  68 _TS = UTF8FirstTwoBytesError.TooShort
  69 _O2 = UTF8FirstTwoBytesError.Overlong2
  70 _SG = UTF8FirstTwoBytesError.Surrogate
  71 _O3 = UTF8FirstTwoBytesError.Overlong3
  72 _O4TL = UTF8FirstTwoBytesError.Overlong4OrTooLarge
  73 _TLG = UTF8FirstTwoBytesError.TooLarge
  74 _2C = UTF8FirstTwoBytesError.TwoContinuations
  75
  76 FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR = 0xFF00
  77 FIRST_BYTE_HIGH_NIBBLE_LUT = [
  78     _TLN,  # first 2 bytes are 0x0? 0x??
  79     _TLN,  # first 2 bytes are 0x1? 0x??
  80     _TLN,  # first 2 bytes are 0x2? 0x??
  81     _TLN,  # first 2 bytes are 0x3? 0x??
  82     _TLN,  # first 2 bytes are 0x4? 0x??
  83     _TLN,  # first 2 bytes are 0x5? 0x??
  84     _TLN,  # first 2 bytes are 0x6? 0x??
  85     _TLN,  # first 2 bytes are 0x7? 0x??
  86     _2C,  # first 2 bytes are 0x8? 0x??
  87     _2C,  # first 2 bytes are 0x9? 0x??
  88     _2C,  # first 2 bytes are 0xA? 0x??
  89     _2C,  # first 2 bytes are 0xB? 0x??
  90     _TS | _O2,  # first 2 bytes are 0xC? 0x??
  91     _TS,  # first 2 bytes are 0xD? 0x??
  92     _TS | _SG | _O3,  # first 2 bytes are 0xE? 0x??
  93     _TS | _O4TL | _TLG,  # first 2 bytes are 0xF? 0x??
  94 ]
  95 FIRST_BYTE_LOW_NIBBLE_LUT_ADDR = 0xFF10
  96 FIRST_BYTE_LOW_NIBBLE_LUT = [
  97     _TLN | _TS | _O2 | _O3 | _O4TL | _2C,  # first 2 bytes are 0x?0 0x??
  98     _TLN | _TS | _O2 | _2C,  # first 2 bytes are 0x?1 0x??
  99     _TLN | _TS | _2C,  # first 2 bytes are 0x?2 0x??
 100     _TLN | _TS | _2C,  # first 2 bytes are 0x?3 0x??
 101     _TLN | _TS | _TLG | _2C,  # first 2 bytes are 0x?4 0x??
 102     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?5 0x??
 103     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?6 0x??
 104     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?7 0x??
 105     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?8 0x??
 106     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?9 0x??
 107     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?A 0x??
 108     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?B 0x??
 109     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?C 0x??
 110     _TLN | _TS | _SG | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?D 0x??
 111     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?E 0x??
 112     _TLN | _TS | _O4TL | _TLG | _2C,  # first 2 bytes are 0x?F 0x??
 113 ]
 114 SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR = 0xFF20
 115 SECOND_BYTE_HIGH_NIBBLE_LUT = [
 116     _TS,  # first 2 bytes are 0x?? 0x0?
 117     _TS,  # first 2 bytes are 0x?? 0x1?
 118     _TS,  # first 2 bytes are 0x?? 0x2?
 119     _TS,  # first 2 bytes are 0x?? 0x3?
 120     _TS,  # first 2 bytes are 0x?? 0x4?
 121     _TS,  # first 2 bytes are 0x?? 0x5?
 122     _TS,  # first 2 bytes are 0x?? 0x6?
 123     _TS,  # first 2 bytes are 0x?? 0x7?
 124     _TLN | _O2 | _O3 | _O4TL | _2C,  # first 2 bytes are 0x?? 0x8?
 125     _TLN | _O2 | _O3 | _TLG | _2C,  # first 2 bytes are 0x?? 0x9?
 126     _TLN | _O2 | _SG | _TLG | _2C,  # first 2 bytes are 0x?? 0xA?
 127     _TLN | _O2 | _SG | _TLG | _2C,  # first 2 bytes are 0x?? 0xB?
 128     _TS,  # first 2 bytes are 0x?? 0xC?
 129     _TS,  # first 2 bytes are 0x?? 0xD?
 130     _TS,  # first 2 bytes are 0x?? 0xE?
 131     _TS,  # first 2 bytes are 0x?? 0xF?
 132 ]
 133
 134
 135 def svp64_utf8_validation_asm():
 136     vec_sz = 8  # limited by number of CR fields implemented in the simulator
 137     inp_addr = 3
 138     # cur bytes in r48-r63 -- u64x16
 139     cur_bytes = 48
 140     # prev bytes in r45-r47 -- u64x3
 141     prev_bytes_sz = 3
 142     prev_bytes = cur_bytes - prev_bytes_sz
 143     # error flags in r56-r63 -- u64x8
 144     temp_vec1 = cur_bytes + vec_sz
 145     # nibbles to look up in r64-r71 -- u64x8
 146     temp_vec2 = cur_bytes + vec_sz * 2
 147     temp_vec2_end = temp_vec2 + vec_sz
 148
 149     def sv_set_0x80_if_ge(out_v, inp_v, temp_s, compare_rhs):
 150         # type: (int, int, int, int) -> list[str]
 151         """ generate values with bit 0x80 set if the input vector is
 152         unsigned `>= compare_rhs`, this assumes `0x80 <= compare_rhs <= 0xFF`
 153         and the input vector elements are in `0 <= v <= 0xFF`.
 154
 155         can't use CRs for this, since vectors of CRs used as masks currently
 156         max out at 4 in the simulator.
 157         """
 158         assert 0x80 <= compare_rhs <= 0xFF, \
 159             "the algorithm only works if compare_rhs is in range"
 160         max_arg = compare_rhs - 1
 161         add_arg = 0x80 - compare_rhs
 162         return [
 163             f"addi {temp_s}, 0, {max_arg}",
 164             f"sv.maxu *{out_v}, *{inp_v}, {temp_s}",
 165             f"sv.addi *{out_v}, *{out_v}, {add_arg}"
 166         ]
 167     return [
 168         # input addr in r3, input length in r4
 169         f"setvl 0, 0, {prev_bytes_sz}, 0, 1, 1",  # set VL to prev_bytes_sz
 170         # clear what will go into prev bytes
 171         f"sv.addi *{cur_bytes + vec_sz - prev_bytes_sz}, 0, 0",
 172         f"addis 6, 0, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}",
 173         f"ori 6, 6, {FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR & 0xFFFF}",
 174         f"addis 7, 0, {FIRST_BYTE_LOW_NIBBLE_LUT_ADDR >> 16}",
 175         f"ori 7, 7, {FIRST_BYTE_LOW_NIBBLE_LUT_ADDR & 0xFFFF}",
 176         f"addis 8, 0, {SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR >> 16}",
 177         f"ori 8, 8, {SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR & 0xFFFF}",
 178         f"loop:",
 179         f"setvl 0, 0, {prev_bytes_sz}, 0, 1, 1",  # set VL to prev_bytes_sz
 180         # copy prev bytes from end of cur bytes
 181         f"sv.ori *{prev_bytes}, *{cur_bytes + vec_sz - prev_bytes_sz}, 0",
 182
 183         # clear cur bytes, so bytes beyond end end up being zeros
 184         f"setvl 0, 0, {vec_sz}, 0, 1, 1",  # set VL to vec_sz
 185         f"sv.addi *{cur_bytes}, 0, 0",  # clear cur bytes
 186         f"setvl. 5, 4, {vec_sz}, 0, 1, 1",  # set VL to min(vec_sz, r4)
 187         # if no bytes left to load, run final check
 188         f"bc 12, 2, final_check # beq final_check",
 189         # sv.lbz/els is buggy, use sv.lbzx instead:
 190         f"sv.addi *{cur_bytes + 1}, *{cur_bytes}, 1",  # create indexes
 191         f"sv.lbzx *{cur_bytes}, {inp_addr}, *{cur_bytes}",  # load bytes
 192         f"setvl 0, 0, {vec_sz}, 0, 1, 1",  # set VL to vec_sz
 193         # now we can operate on vec_sz byte chunks, branch to `fail` if they
 194         # don't pass validation.
 195
 196         # get high nibbles of input shifted by 1 byte
 197         (f"sv.rldicl *{temp_vec2}, *{cur_bytes - 1}, {64 - 4}, 4"
 198          f" # sv.srdi *{temp_vec2}, *{cur_bytes - 1}, 4"),
 199         # look-up nibbles in table, writing to error flags
 200         f"sv.lbzx *{temp_vec1}, 6, *{temp_vec2}",
 201
 202         # get low nibbles of input shifted by 1 byte
 203         # there is no andi without Rc
 204         # sv.andi. with scalars is buggy, so use a temporary and sv.and
 205         f"addi 9, 0, {0xF}",
 206         f"sv.and *{temp_vec2}, *{cur_bytes - 1}, 9",
 207         # look-up nibbles in table
 208         f"sv.lbzx *{temp_vec2}, 7, *{temp_vec2}",
 209         # bitwise and into error flags
 210         f"sv.and *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
 211
 212         # get high nibbles of input
 213         # srdi *{temp_vec2}, *{cur_bytes}, 4
 214         f"sv.rldicl *{temp_vec2}, *{cur_bytes}, {64 - 4}, 4",
 215         # look-up nibbles in table
 216         f"sv.lbzx *{temp_vec2}, 8, *{temp_vec2}",
 217         # bitwise and into error flags
 218         f"sv.and *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
 219
 220         # or-reduce error flags into temp_vec2_end
 221         f"sv.addi {temp_vec2_end}, 0, 0",
 222         f"sv.ori *{temp_vec2}, *{temp_vec1}, 0",
 223         f"sv.or *{temp_vec2 + 1}, *{temp_vec2}, *{temp_vec2 + 1}",
 224         # check for any actual error flags set
 225         # sv.andi. is buggy, so use sv.and, then compare
 226         f"addi 9, 0, {UTF8FirstTwoBytesError.AllActualErrors}",
 227         f"sv.and 9, {temp_vec2_end}, 9",
 228         f"cmpli 0, 1, 9, 0",
 229         f"bc 4, 2, fail # bne fail",
 230
 231         # check for the correct number of continuation bytes for 3/4-byte cases
 232
 233         # set bit 0x80 (TwoContinuations) if input is >= 0xE0
 234         *sv_set_0x80_if_ge(out_v=temp_vec2, inp_v=cur_bytes - 2,
 235                            temp_s=9, compare_rhs=0xE0),
 236         # xor into error flags
 237         f"sv.xor *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
 238         # set bit 0x80 (TwoContinuations) if input is >= 0xF0
 239         *sv_set_0x80_if_ge(out_v=temp_vec2, inp_v=cur_bytes - 3,
 240                            temp_s=9, compare_rhs=0xF0),
 241         # xor into error flags
 242         f"sv.xor *{temp_vec1}, *{temp_vec1}, *{temp_vec2}",
 243         # now bit 0x80 is set in temp_vec1 if there's an error
 244         # or-reduce into temp_vec2
 245         f"sv.addi {temp_vec2}, 0, 0",
 246         f"sv.or *{temp_vec1 + 1}, *{temp_vec1}, *{temp_vec1 + 1}",
 247         # adjust count/pointer
 248         f"add 3, 3, 5",  # increment pointer
 249         f"subf 4, 5, 4",  # decrement count
 250         # sv.andi. is buggy, so move to r9 first
 251         f"sv.ori 9, {temp_vec2}, 0",
 252         f"andi. 9, 9, {0x80}",  # check if any errors
 253         f"bc 12, 2, loop # beq loop",  # if no errors loop, else fail
 254         f"fail:",
 255         f"addi 3, 0, 0",
 256         f"bclr 20, 0, 0 # blr",
 257         f"final_check:",
 258
 259         # need to set VL to 1, here
 260         # https://bugs.libre-soc.org/show_bug.cgi?id=905
 261         # (SVP64Single is planned for accessing high-regnumbers as Scalars)
 262         #
 263         # setting VL=0 (often set dynamically at runtime in Standard Cray
 264         # Vectors) is the standard canonical way in Cray Vectors to legitimately
 265         # request instructions not to be run at all (nops).
 266         #
 267         # a workaround for not having SVP64Single right now and still get
 268         # at the high register numbers (32-127) is to static-set VL=1
 269         # the alternative is to move cur_bytes to reg numbers 0-31 but
 270         # 16 regs within the range 0-31 is a lot to ask for.
 271         f"setvl 0, 0, 1, 0, 1, 1",  # set VL to 1
 272
 273         # check if prev input is incomplete
 274         # check if byte 3 bytes from end needed 4 bytes
 275         f"sv.cmpli 0, 1, {cur_bytes - 3}, {0xF0}",
 276         f"bc 4, 0, fail # bge fail",
 277         # check if byte 2 bytes from end needed 3 bytes
 278         f"sv.cmpli 0, 1, {cur_bytes - 2}, {0xE0}",
 279         f"bc 4, 0, fail # bge fail",
 280         # check if byte 1 bytes from end needed 2 bytes
 281         f"sv.cmpli 0, 1, {cur_bytes - 1}, {0xC0}",
 282         f"bc 4, 0, fail # bge fail",
 283         f"addi 3, 0, 1",
 284         f"bclr 20, 0, 0 # blr",
 285     ]
 286
 287
 288 def assemble(instructions, start_pc=0):
 289     pc = start_pc
 290     labels = {}
 291     out_instructions = []
 292     for instr in instructions:
 293         m = re.fullmatch(r" *([a-zA-Z0-9_]+): *(#.*)?", instr)
 294         if m is not None:
 295             name = m.group(1)
 296             if name in labels:
 297                 raise ValueError(f"label {name!r} defined multiple times")
 298             labels[name] = pc
 299             continue
 300         m = re.fullmatch(r" *sv\.[a-zA-Z0-9_].*", instr)
 301         if m is not None:
 302             pc += 8
 303         else:
 304             pc += 4
 305         out_instructions.append((pc, instr))
 306     last_pc = pc
 307
 308     for (idx, (pc, instr)) in enumerate(tuple(out_instructions)):
 309         for (label, target) in labels.items():
 310             if label in instr:
 311                 if pc < target:
 312                     sign = ""
 313                     addr = (target - pc + 4)
 314                 else:
 315                     sign = "-"
 316                     addr = (pc - target - 4)
 317
 318                 origin = instr
 319                 instr = instr.replace(label, f"{sign}0x{addr:X}")
 320                 break
 321         out_instructions[idx] = instr
 322
 323     for k, v in labels.items():
 324         out_instructions.append(f".set {k}, . - 0x{last_pc - v:X} # 0x{v:X}")
 325
 326     return Program(list(SVP64Asm(out_instructions)), 0)
 327
 328
 329 class SVP64UTF8ValidationTestCase(TestAccumulatorBase):
 330     def __init__(self):
 331         self.__seen_cases = set()
 332         super().__init__()
 333
 334     @cached_property
 335     def program(self):
 336         return assemble(svp64_utf8_validation_asm())
 337
 338     def run_case(self, data, src_loc_at=0):
 339         # type: (bytes, int) -> None
 340         if data in self.__seen_cases:
 341             return
 342         self.__seen_cases.add(data)
 343         expected = 1
 344         try:
 345             data.decode("utf-8")
 346         except UnicodeDecodeError:
 347             expected = 0
 348         initial_regs = [0x15cee3293aa9bfbe] * 128  # fill with junk
 349         initial_regs[3] = 0x10000  # pointer to bytes to check
 350         initial_regs[4] = len(data)  # length of bytes to check
 351
 352         initial_mem = {}
 353         for i, v in enumerate(data):
 354             initial_mem[i + initial_regs[3]] = v, 1
 355         for i, v in enumerate(FIRST_BYTE_LOW_NIBBLE_LUT):
 356             initial_mem[i + FIRST_BYTE_LOW_NIBBLE_LUT_ADDR] = int(v), 1
 357         for i, v in enumerate(FIRST_BYTE_HIGH_NIBBLE_LUT):
 358             initial_mem[i + FIRST_BYTE_HIGH_NIBBLE_LUT_ADDR] = int(v), 1
 359         for i, v in enumerate(SECOND_BYTE_HIGH_NIBBLE_LUT):
 360             initial_mem[i + SECOND_BYTE_HIGH_NIBBLE_LUT_ADDR] = int(v), 1
 361         stop_at_pc = 0x10000000
 362         initial_sprs = {8: SelectableInt(stop_at_pc, 64)}
 363         e = ExpectedState(pc=stop_at_pc, int_regs=4, crregs=0, fp_regs=0,
 364                           so=None, ov=None, ca=None)
 365         e.intregs[:3] = initial_regs[:3]
 366         e.intregs[3] = expected
 367         with self.subTest(data=data, expected=expected):
 368             self.add_case(self.program, initial_regs, initial_mem=initial_mem,
 369                           initial_sprs=initial_sprs, stop_at_pc=stop_at_pc,
 370                           expected=e,
 371                           src_loc_at=src_loc_at + 1)
 372
 373     def run_cases(self, data):
 374         # type: (bytes | str) -> None
 375         if isinstance(data, str):
 376             data = data.encode("utf-8")
 377         data = data.center(8, b' ')
 378         for i in range(len(data)):
 379             self.run_case(data[i:], src_loc_at=1)
 380             self.run_case(data[:i], src_loc_at=1)
 381
 382     def case_empty(self):
 383         self.run_case(b"")
 384
 385     def case_x6_sp_nul(self):
 386         self.run_case(b' ' * 6 + b'\x00')
 387
 388     def case_nul(self):
 389         self.run_cases("\u0000")  # min 1-byte
 390
 391     def case_a(self):
 392         self.run_cases("a")
 393
 394     def case_7f(self):
 395         self.run_cases("\u007F")  # max 1-byte
 396
 397     def case_c0_80(self):
 398         self.run_cases(b"\xC0\x80")  # min 2-byte overlong encoding
 399
 400     def case_c1_bf(self):
 401         self.run_cases(b"\xC1\xBF")  # max 2-byte overlong encoding
 402
 403     def case_u0080(self):
 404         self.run_cases("\u0080")  # min 2-byte
 405
 406     def case_u07ff(self):
 407         self.run_cases("\u07FF")  # max 2-byte
 408
 409     def case_e0_80_80(self):
 410         self.run_cases(b"\xE0\x80\x80")  # min 3-byte overlong encoding
 411
 412     def case_e0_9f_bf(self):
 413         self.run_cases(b"\xE0\x9F\xBF")  # max 3-byte overlong encoding
 414
 415     def case_u0800(self):
 416         self.run_cases("\u0800")  # min 3-byte
 417
 418     def case_u0fff(self):
 419         self.run_cases("\u0FFF")
 420
 421     def case_u1000(self):
 422         self.run_cases("\u1000")
 423
 424     def case_ucfff(self):
 425         self.run_cases("\uCFFF")
 426
 427     def case_ud000(self):
 428         self.run_cases("\uD000")
 429
 430     def case_ud7ff(self):
 431         self.run_cases("\uD7FF")
 432
 433     def case_ed_a0_80(self):
 434         self.run_cases(b"\xED\xA0\x80")  # first high surrogate
 435
 436     def case_ed_af_bf(self):
 437         self.run_cases(b"\xED\xAF\xBF")  # last high surrogate
 438
 439     def case_ed_b0_80(self):
 440         self.run_cases(b"\xED\xB0\x80")  # first low surrogate
 441
 442     def case_ed_bf_bf(self):
 443         self.run_cases(b"\xED\xBF\xBF")  # last low surrogate
 444
 445     def case_ue000(self):
 446         self.run_cases("\uE000")
 447
 448     def case_uffff(self):
 449         self.run_cases("\uFFFF")  # max 3-byte
 450
 451     def case_f0_80_80_80(self):
 452         self.run_cases(b"\xF0\x80\x80\x80")  # min 4-byte overlong encoding
 453
 454     def case_f0_bf_bf_bf(self):
 455         self.run_cases(b"\xF0\x8F\xBF\xBF")  # max 4-byte overlong encoding
 456
 457     def case_u00010000(self):
 458         self.run_cases("\U00010000")  # min 4-byte
 459
 460     def case_u0003ffff(self):
 461         self.run_cases("\U0003FFFF")
 462
 463     def case_u00040000(self):
 464         self.run_cases("\U00040000")
 465
 466     def case_u000fffff(self):
 467         self.run_cases("\U000FFFFF")
 468
 469     def case_u00100000(self):
 470         self.run_cases("\U00100000")
 471
 472     def case_u0010ffff(self):
 473         self.run_cases("\U0010FFFF")  # max 4-byte
 474
 475     def case_f4_90_80_80(self):
 476         self.run_cases(b"\xF4\x90\x80\x80")  # first too-big encoding
 477
 478     def case_f7_bf_bf_bf(self):
 479         self.run_cases(b"\xF7\xBF\xBF\xBF")  # max too-big 4-byte encoding
 480
 481     def case_f8_x4_80(self):
 482         self.run_cases(b"\xF8" + b"\x80" * 4)  # min too-big 5-byte encoding
 483
 484     def case_fb_x4_bf(self):
 485         self.run_cases(b"\xFB" + b"\xBF" * 4)  # max too-big 5-byte encoding
 486
 487     def case_fc_x5_80(self):
 488         self.run_cases(b"\xFC" + b"\x80" * 5)  # min too-big 6-byte encoding
 489
 490     def case_fd_x5_bf(self):
 491         self.run_cases(b"\xFD" + b"\xBF" * 5)  # max too-big 6-byte encoding
 492
 493     def case_fe_x6_80(self):
 494         self.run_cases(b"\xFE" + b"\x80" * 6)  # min too-big 7-byte encoding
 495
 496     def case_fe_x6_bf(self):
 497         self.run_cases(b"\xFE" + b"\xBF" * 6)  # max too-big 7-byte encoding
 498
 499     def case_ff_x7_80(self):
 500         self.run_cases(b"\xFF" + b"\x80" * 7)  # min too-big 8-byte encoding
 501
 502     def case_ff_x7_bf(self):
 503         self.run_cases(b"\xFF" + b"\xBF" * 7)  # max too-big 8-byte encoding