(no commit message)
[libreriscv.git] / openpower / sv / bitmanip.mdwn
1 [[!tag standards]]
2
3 # summary
4
5 minor opcode allocation
6
7 | 28.30 |31| name |
8 | ------ |--| --------- |
9 | 00 |Rc| ternaryi |
10 | 001 |Rc| ternary |
11 | 010 |Rc| bitmask |
12 | 011 |Rc| gf* |
13 | 101 |1 | ternaryv |
14 | 101 |0 | ternarycr |
15 | 110 |1 | 1/2-op |
16 | 111 |Rc| reserved |
17
18 1-op and variants
19
20 | dest | src1 | subop | op |
21 | ---- | ---- | ----- | -------- |
22 | RT | RA | .. | bmatflip |
23
24 2-op and variants
25
26 | dest | src1 | src2 | subop | op |
27 | ---- | ---- | ---- | ----- | -------- |
28 | RT | RA | RB | or | bmatflip |
29 | RT | RA | RB | xor | bmatflip |
30 | RT | RA | RB | bdep | dep/ext |
31 | RT | RA | RB | bext | dep/ext |
32 | RT | RA | RB | | grev |
33 | RT | RA | RB | | gorc |
34 | RT | RA | RB | shuf | shuffle |
35 | RT | RA | RB | unshuf| shuffle |
36 | RT | RA | RB | width | xperm |
37 | RT | RA | RB | type | minmax |
38 | RT | RA | RB | | |
39 | RT | RA | RB | | |
40 | RT | RA | RB | | |
41
42 3 ops
43
44 * bitmask set/extract
45 * ternary bitops
46 * GF
47
48 | 0.5|6.10|11.15|16.20|21..25 | 26....30 |31| name |
49 | -- | -- | --- | --- | ----- | -------- |--| ------ |
50 | NN | RT | RA | RB | RC | mode 001 |Rc| ternary |
51 | NN | RT | RA | RB | im0-4 | im5-7 00 |Rc| ternaryi |
52 | NN | RS | RA | RB | deg | 00 011 |Rc| gfmul |
53 | NN | RS | RA | RB | deg | 01 011 |Rc| gfadd |
54 | NN | RT | RA | RB | deg | 10 011 |Rc| gfinv |
55 | NN | RS | RA | RB | deg | 11 011 |Rc| gf rsvd |
56
57 | 0.5|6.10|11.15| 16.23 |24.27 | 28.30 |31| name |
58 | -- | -- | --- | ----- | ---- | ----- |--| ------ |
59 | NN | RT | RA | imm | mask | 101 |1 | ternaryv |
60
61 | 0.5|6.8 | 9.11|12.14|15|16.23|24.27 | 28.30|31| name |
62 | -- | -- | --- | --- |- |-----|----- | -----|--| -------|
63 | NN | BA | BB | BC |0 |imm | mask | 101 |0 | ternarycr |
64
65 ops
66
67 | 0.5|6.10|11.15|16.20| 21.22 | 23 | 24....30 |31| name |
68 | -- | -- | --- | --- | ----- | -- | -------- |--| ---- |
69 | NN | RA | RB | | | 0 | 0000 110 |Rc| rsvd |
70 | NN | RA | RB | RC | itype | 1 | 0000 110 |Rc| xperm |
71 | NN | RA | RB | RC | itype | 0 | 0100 110 |Rc| minmax |
72 | NN | RA | RB | | | 1 | 0100 110 |Rc| rsvd |
73 | NN | RA | RB | sh | itype | SH | 1000 110 |Rc| bmopsi |
74 | NN | RA | RB | | | | 1100 110 |Rc| rsvd |
75 | NN | RA | RB | | | 0 | 0001 110 |Rc| rsvd |
76 | NN | RA | RB | | | 0 | 0101 110 |Rc| rsvd |
77 | NN | RA | RB | RC | 00 | 0 | 0010 110 |Rc| gorc |
78 | NN | RA | RB | sh | 00 | SH | 1010 110 |Rc| gorci |
79 | NN | RA | RB | RC | 00 | 0 | 0110 110 |Rc| gorcw |
80 | NN | RA | RB | sh | 00 | 0 | 1110 110 |Rc| gorcwi |
81 | NN | RA | RB | RC | 00 | 1 | 1110 110 |Rc| bmator |
82 | NN | RA | RB | RC | 01 | 0 | 0010 110 |Rc| grev |
83 | NN | RA | RB | sh | 01 | SH | 1010 110 |Rc| grevi |
84 | NN | RA | RB | RC | 01 | 0 | 0110 110 |Rc| grevw |
85 | NN | RA | RB | sh | 01 | 0 | 1110 110 |Rc| grevwi |
86 | NN | RA | RB | RC | 01 | 1 | 1110 110 |Rc| bmatxor |
87 | NN | RA | RB | RC | 10 | 0 | 0010 110 |Rc| shfl |
88 | NN | RA | RB | sh | 10 | SH | 1010 110 |Rc| shfli |
89 | NN | RA | RB | RC | 10 | 0 | 0110 110 |Rc| shflw |
90 | NN | RA | RB | RC | 10 | 0 | 1110 110 |Rc| bdep |
91 | NN | RA | RB | RC | 10 | 1 | 1110 110 |Rc| bext |
92 | NN | RA | RB | | 11 | | 1110 110 |Rc| rsvd |
93 | NN | RA | RB | | | | NN11 110 |Rc| rsvd |
94
95 # bit to byte permute
96
97 similar to matrix permute in RV bitmanip, which has XOR and OR variants
98
99 do j = 0 to 7
100 do k = 0 to 7
101 b = VSR[VRB+32].dword[i].byte[k].bit[j]
102 VSR[VRT+32].dword[i].byte[j].bit[k] = b
103
104 # vector bit deposit
105
106 vpdepd VRT,VRA,VRB, identical to RV bitmamip bdep
107
108 do while(m < 64)
109 if VSR[VRB+32].dword[i].bit[63-m]=1 then do
110 result = VSR[VRA+32].dword[i].bit[63-k]
111 VSR[VRT+32].dword[i].bit[63-m] = result
112 k = k + 1
113 m = m + 1
114
115 ```
116
117 uint_xlen_t bdep(uint_xlen_t RA, uint_xlen_t RB)
118 {
119 uint_xlen_t r = 0;
120 for (int i = 0, j = 0; i < XLEN; i++)
121 if ((RB >> i) & 1) {
122 if ((RA >> j) & 1)
123 r |= uint_xlen_t(1) << i;
124 j++;
125 }
126 return r;
127 }
128
129 ```
130
131 # vector bit extract
132
133 other way round: identical to RV bext
134
135 ```
136 uint_xlen_t bext(uint_xlen_t RA, uint_xlen_t RB)
137 {
138 uint_xlen_t r = 0;
139 for (int i = 0, j = 0; i < XLEN; i++)
140 if ((RB >> i) & 1) {
141 if ((RA >> i) & 1)
142 r |= uint_xlen_t(1) << j;
143 j++;
144 }
145 return r;
146 }
147 ```
148
149 # int min/max
150
151 signed and unsigned min/max for integer. this is sort-of partly synthesiseable in [[sv/svp64]] with pred-result as long as the dest reg is one of the sources, but not both signed and unsigned. when the dest is also one of the srces and the mv fails due to the CR bittest failing this will only overwrite the dest where the src is greater (or less).
152
153 signed/unsigned min/max gives more flexibility.
154
155 # ternary bitops
156
157 Similar to FPGA LUTs: for every bit perform a lookup into a table using an 8bit immediate, or in another register
158
159 | 0.5|6.10|11.15|16.20| 21..25| 26..30 |31|
160 | -- | -- | --- | --- | ----- | -------- |--|
161 | NN | RT | RA | RB | im0-4 | im5-7 00 |Rc|
162
163 for i in range(64):
164 idx = RT[i] << 2 | RA[i] << 1 | RB[i]
165 RT[i] = (imm & (1<<idx)) != 0
166
167 bits 21..22 may be used to specify a mode, such as treating the whole integer zero/nonzero and putting 1/0 in the result, rather than bitwise test.
168
169 a 4 operand variant which becomes more along the lines of an FPGA:
170
171 | 0.5|6.10|11.15|16.20|21.25| 26...30 |31|
172 | -- | -- | --- | --- | --- | -------- |--|
173 | NN | RT | RA | RB | RC | mode 001 |Rc|
174
175 for i in range(64):
176 idx = RT[i] << 2 | RA[i] << 1 | RB[i]
177 RT[i] = (RC & (1<<idx)) != 0
178
179 mode (2 bit) may be used to do inversion of ordering, similar to carryless mul,
180 3 modes.
181
182 also, another possible variant involving swizzle and vec4:
183
184 | 0.5|6.10|11.15| 16.23 |24.27 | 28.30 |31|
185 | -- | -- | --- | ----- | ---- | ----- |--|
186 | NN | RT | RA | imm | mask | 101 |1 |
187
188 for i in range(8):
189 idx = RA.x[i] << 2 | RA.y[i] << 1 | RA.z[i]
190 res = (imm & (1<<idx)) != 0
191 for j in range(3):
192 if mask[j]: RT[i+j*8] = res
193
194 another mode selection would be CRs not Ints.
195
196 | 0.5|6.8 | 9.11|12.14|15|16.23|24.27 | 28.30|31|
197 | -- | -- | --- | --- |- |-----|----- | -----|--|
198 | NN | BA | BB | BC |0 |imm | mask | 101 |0 |
199
200 for i in range(4):
201 if not mask[i] continue
202 idx = crregs[BA][i] << 2 |
203 crregs[BB][i] << 1 |
204 crregs[BC][i]
205 crregs[BA][i] = (imm & (1<<idx)) != 0
206
207 # bitmask set
208
209 based on RV bitmanip singlebit set, instruction format similar to shift
210 [[isa/fixedshift]]. bmext is actually covered already (shift-with-mask).
211 however bitmask-invert is not, and set/clr are not covered, although they can ise the same Shift ALU.
212
213 | 0.5|6.10|11.15|16.20|21.25| 26..30 |31|
214 | -- | -- | --- | --- | --- | ------- |--|
215 | NN | RT | RA | RB | RC | mode 010 |Rc|
216
217 ```
218 uint_xlen_t bmset(RA, RB, sh)
219 {
220 int shamt = RB & (XLEN - 1);
221 mask = (2<<sh)-1;
222 return RA | (mask << shamt);
223 }
224
225 uint_xlen_t bmclr(RA, RB, sh)
226 {
227 int shamt = RB & (XLEN - 1);
228 mask = (2<<sh)-1;
229 return RA & ~(mask << shamt);
230 }
231
232 uint_xlen_t bminv(RA, RB, sh)
233 {
234 int shamt = RB & (XLEN - 1);
235 mask = (2<<sh)-1;
236 return RA ^ (mask << shamt);
237 }
238
239 uint_xlen_t bmext(RA, RB, sh)
240 {
241 int shamt = RB & (XLEN - 1);
242 mask = (2<<sh)-1;
243 return mask & (RA >> shamt);
244 }
245 ```
246
247 # grev
248
249 based on RV bitmanip
250
251 ```
252 uint64_t grev64(uint64_t RA, uint64_t RB)
253 {
254 uint64_t x = RA;
255 int shamt = RB & 63;
256 if (shamt & 1) x = ((x & 0x5555555555555555LL) << 1) |
257 ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
258 if (shamt & 2) x = ((x & 0x3333333333333333LL) << 2) |
259 ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
260 if (shamt & 4) x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) |
261 ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
262 if (shamt & 8) x = ((x & 0x00FF00FF00FF00FFLL) << 8) |
263 ((x & 0xFF00FF00FF00FF00LL) >> 8);
264 if (shamt & 16) x = ((x & 0x0000FFFF0000FFFFLL) << 16) |
265 ((x & 0xFFFF0000FFFF0000LL) >> 16);
266 if (shamt & 32) x = ((x & 0x00000000FFFFFFFFLL) << 32) |
267 ((x & 0xFFFFFFFF00000000LL) >> 32);
268 return x;
269 }
270
271 ```
272
273 # shuffle / unshuffle
274
275 based on RV bitmanip
276
277 ```
278 uint32_t shfl32(uint32_t RA, uint32_t RB)
279 {
280 uint32_t x = RA;
281 int shamt = RB & 15;
282 if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
283 if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
284 if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2);
285 if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
286 return x;
287 }
288 uint32_t unshfl32(uint32_t RA, uint32_t RB)
289 {
290 uint32_t x = RA;
291 int shamt = RB & 15;
292 if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
293 if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2);
294 if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
295 if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
296 return x;
297 }
298
299 uint64_t shuffle64_stage(uint64_t src, uint64_t maskL, uint64_t maskR, int N)
300 {
301 uint64_t x = src & ~(maskL | maskR);
302 x |= ((src << N) & maskL) | ((src >> N) & maskR);
303 return x;
304 }
305 uint64_t shfl64(uint64_t RA, uint64_t RB)
306 {
307 uint64_t x = RA;
308 int shamt = RB & 31;
309 if (shamt & 16) x = shuffle64_stage(x, 0x0000ffff00000000LL,
310 0x00000000ffff0000LL, 16);
311 if (shamt & 8) x = shuffle64_stage(x, 0x00ff000000ff0000LL,
312 0x0000ff000000ff00LL, 8);
313 if (shamt & 4) x = shuffle64_stage(x, 0x0f000f000f000f00LL,
314 0x00f000f000f000f0LL, 4);
315 if (shamt & 2) x = shuffle64_stage(x, 0x3030303030303030LL,
316 0x0c0c0c0c0c0c0c0cLL, 2);
317 if (shamt & 1) x = shuffle64_stage(x, 0x4444444444444444LL,
318 0x2222222222222222LL, 1);
319 return x;
320 }
321 uint64_t unshfl64(uint64_t RA, uint64_t RB)
322 {
323 uint64_t x = RA;
324 int shamt = RB & 31;
325 if (shamt & 1) x = shuffle64_stage(x, 0x4444444444444444LL,
326 0x2222222222222222LL, 1);
327 if (shamt & 2) x = shuffle64_stage(x, 0x3030303030303030LL,
328 0x0c0c0c0c0c0c0c0cLL, 2);
329 if (shamt & 4) x = shuffle64_stage(x, 0x0f000f000f000f00LL,
330 0x00f000f000f000f0LL, 4);
331 if (shamt & 8) x = shuffle64_stage(x, 0x00ff000000ff0000LL,
332 0x0000ff000000ff00LL, 8);
333 if (shamt & 16) x = shuffle64_stage(x, 0x0000ffff00000000LL,
334 0x00000000ffff0000LL, 16);
335 return x;
336 }
337 ```
338
339 # xperm
340
341 based on RV bitmanip
342
343 ```
344 uint_xlen_t xperm(uint_xlen_t RA, uint_xlen_t RB, int sz_log2)
345 {
346 uint_xlen_t r = 0;
347 uint_xlen_t sz = 1LL << sz_log2;
348 uint_xlen_t mask = (1LL << sz) - 1;
349 for (int i = 0; i < XLEN; i += sz) {
350 uint_xlen_t pos = ((RB >> i) & mask) << sz_log2;
351 if (pos < XLEN)
352 r |= ((RA >> pos) & mask) << i;
353 }
354 return r;
355 }
356 uint_xlen_t xperm_n (uint_xlen_t RA, uint_xlen_t RB)
357 { return xperm(RA, RB, 2); }
358 uint_xlen_t xperm_b (uint_xlen_t RA, uint_xlen_t RB)
359 { return xperm(RA, RB, 3); }
360 uint_xlen_t xperm_h (uint_xlen_t RA, uint_xlen_t RB)
361 { return xperm(RA, RB, 4); }
362 uint_xlen_t xperm_w (uint_xlen_t RA, uint_xlen_t RB)
363 { return xperm(RA, RB, 5); }
364 ```
365
366 # gorc
367
368 based on RV bitmanip
369
370 ```
371 uint32_t gorc32(uint32_t RA, uint32_t RB)
372 {
373 uint32_t x = RA;
374 int shamt = RB & 31;
375 if (shamt & 1) x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
376 if (shamt & 2) x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
377 if (shamt & 4) x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
378 if (shamt & 8) x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8);
379 if (shamt & 16) x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16);
380 return x;
381 }
382 uint64_t gorc64(uint64_t RA, uint64_t RB)
383 {
384 uint64_t x = RA;
385 int shamt = RB & 63;
386 if (shamt & 1) x |= ((x & 0x5555555555555555LL) << 1) |
387 ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
388 if (shamt & 2) x |= ((x & 0x3333333333333333LL) << 2) |
389 ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
390 if (shamt & 4) x |= ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) |
391 ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
392 if (shamt & 8) x |= ((x & 0x00FF00FF00FF00FFLL) << 8) |
393 ((x & 0xFF00FF00FF00FF00LL) >> 8);
394 if (shamt & 16) x |= ((x & 0x0000FFFF0000FFFFLL) << 16) |
395 ((x & 0xFFFF0000FFFF0000LL) >> 16);
396 if (shamt & 32) x |= ((x & 0x00000000FFFFFFFFLL) << 32) |
397 ((x & 0xFFFFFFFF00000000LL) >> 32);
398 return x;
399 }
400
401 ```
402
403 # cmix
404
405 based on RV bitmanip, covered by ternary bitops
406
407 ```
408 uint_xlen_t cmix(uint_xlen_t RA, uint_xlen_t RB, uint_xlen_t RC) {
409 return (RA & RB) | (RC & ~RB);
410 }
411 ```
412
413 # carryless mul
414
415 based on RV bitmanip
416 see https://en.wikipedia.org/wiki/CLMUL_instruction_set
417
418 ```
419 uint_xlen_t clmul(uint_xlen_t RA, uint_xlen_t RB)
420 {
421 uint_xlen_t x = 0;
422 for (int i = 0; i < XLEN; i++)
423 if ((RB >> i) & 1)
424 x ^= RA << i;
425 return x;
426 }
427 uint_xlen_t clmulh(uint_xlen_t RA, uint_xlen_t RB)
428 {
429 uint_xlen_t x = 0;
430 for (int i = 1; i < XLEN; i++)
431 if ((RB >> i) & 1)
432 x ^= RA >> (XLEN-i);
433 return x;
434 }
435 uint_xlen_t clmulr(uint_xlen_t RA, uint_xlen_t RB)
436 {
437 uint_xlen_t x = 0;
438 for (int i = 0; i < XLEN; i++)
439 if ((RB >> i) & 1)
440 x ^= RA >> (XLEN-i-1);
441 return x;
442 }
443 ```
444 # Galois Field
445
446 ## Multiply
447
448 this requires 3 parameters and a "degree"
449
450 RT = GFMUL(RA, RB, gfdegree, modulo=RC)
451
452 realistically with the degree also needing to be an immediate it should be brought down to an overwrite version:
453
454 RS = GFMUL(RS, RA, gfdegree, modulo=RB)
455
456 | 0.5|6.10|11.15|16.20|21.25| 26..30 |31|
457 | -- | -- | --- | --- | --- | ------- |--|
458 | NN | RS | RA | RB | deg | 00 011 |Rc|
459
460 where the SimpleV variant may override RS-as-src differently from RS-as-dest
461
462
463
464 ```
465 from functools import reduce
466
467 # constants used in the multGF2 function
468 mask1 = mask2 = polyred = None
469
470 def setGF2(degree, irPoly):
471 """Define parameters of binary finite field GF(2^m)/g(x)
472 - degree: extension degree of binary field
473 - irPoly: coefficients of irreducible polynomial g(x)
474 """
475 def i2P(sInt):
476 """Convert an integer into a polynomial"""
477 return [(sInt >> i) & 1
478 for i in reversed(range(sInt.bit_length()))]
479
480 global mask1, mask2, polyred
481 mask1 = mask2 = 1 << degree
482 mask2 -= 1
483 polyred = reduce(lambda x, y: (x << 1) + y, i2P(irPoly)[1:])
484
485 def multGF2(p1, p2):
486 """Multiply two polynomials in GF(2^m)/g(x)"""
487 p = 0
488 while p2:
489 if p2 & 1:
490 p ^= p1
491 p1 <<= 1
492 if p1 & mask1:
493 p1 ^= polyred
494 p2 >>= 1
495 return p & mask2
496
497 if __name__ == "__main__":
498
499 # Define binary field GF(2^3)/x^3 + x + 1
500 setGF2(3, 0b1011)
501
502 # Evaluate the product (x^2 + x + 1)(x^2 + 1)
503 print("{:02x}".format(multGF2(0b111, 0b101)))
504
505 # Define binary field GF(2^8)/x^8 + x^4 + x^3 + x + 1
506 # (used in the Advanced Encryption Standard-AES)
507 setGF2(8, 0b100011011)
508
509 # Evaluate the product (x^7)(x^7 + x + 1)
510 print("{:02x}".format(multGF2(0b10000000, 0b10000011)))
511 ```
512 ## GF add
513
514 RS = GFADD(RS, RA|0, gfdegree, modulo=RB)
515
516 | 0.5|6.10|11.15|16.20|21.25| 26..30 |31|
517 | -- | -- | --- | --- | --- | ------- |--|
518 | NN | RS | RA | RB | deg | 01 011 |Rc|
519
520 ## gf invert
521
522 ```
523 def gf_degree(a) :
524 res = 0
525 a >>= 1
526 while (a != 0) :
527 a >>= 1;
528 res += 1;
529 return res
530
531 def gf_invert(a, mod=0x1B) :
532 v = mod
533 g1 = 1
534 g2 = 0
535 j = gf_degree(a) - 8
536
537 while (a != 1) :
538 if (j < 0) :
539 a, v = v, a
540 g1, g2 = g2, g1
541 j = -j
542
543 a ^= v << j
544 g1 ^= g2 << j
545
546 a %= 256 # Emulating 8-bit overflow
547 g1 %= 256 # Emulating 8-bit overflow
548
549 j = gf_degree(a) - gf_degree(v)
550
551 return g1
552 ```
553
554 # bitmatrix
555
556 ```
557 uint64_t bmatflip(uint64_t RA)
558 {
559 uint64_t x = RA;
560 x = shfl64(x, 31);
561 x = shfl64(x, 31);
562 x = shfl64(x, 31);
563 return x;
564 }
565 uint64_t bmatxor(uint64_t RA, uint64_t RB)
566 {
567 // transpose of RB
568 uint64_t RBt = bmatflip(RB);
569 uint8_t u[8]; // rows of RA
570 uint8_t v[8]; // cols of RB
571 for (int i = 0; i < 8; i++) {
572 u[i] = RA >> (i*8);
573 v[i] = RBt >> (i*8);
574 }
575 uint64_t x = 0;
576 for (int i = 0; i < 64; i++) {
577 if (pcnt(u[i / 8] & v[i % 8]) & 1)
578 x |= 1LL << i;
579 }
580 return x;
581 }
582 uint64_t bmator(uint64_t RA, uint64_t RB)
583 {
584 // transpose of RB
585 uint64_t RBt = bmatflip(RB);
586 uint8_t u[8]; // rows of RA
587 uint8_t v[8]; // cols of RB
588 for (int i = 0; i < 8; i++) {
589 u[i] = RA >> (i*8);
590 v[i] = RBt >> (i*8);
591 }
592 uint64_t x = 0;
593 for (int i = 0; i < 64; i++) {
594 if ((u[i / 8] & v[i % 8]) != 0)
595 x |= 1LL << i;
596 }
597 return x;
598 }
599
600 ```