2 * Copyright (C) 2020 Collabora Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
29 #include "util/half_float.h"
30 #include "bifrost/disassemble.h"
32 /* Instruction packing tests */
35 bit_test_single(struct panfrost_device
*dev
,
38 bool fma
, enum bit_debug debug
)
40 /* First, simulate the instruction */
41 struct bit_state s
= { 0 };
42 memcpy(s
.r
, input
, 16);
43 bit_step(&s
, ins
, fma
);
45 /* Next, wrap it up and pack it */
47 bi_instruction ldubo
= {
48 .type
= BI_LOAD_UNIFORM
,
57 .dest
= BIR_INDEX_REGISTER
| 0,
58 .dest_type
= nir_type_uint32
,
62 bi_instruction ldva
= {
63 .type
= BI_LOAD_VAR_ADDRESS
,
65 .dest
= BIR_INDEX_REGISTER
| 32,
66 .dest_type
= nir_type_uint32
,
69 BIR_INDEX_REGISTER
| 61,
70 BIR_INDEX_REGISTER
| 62,
84 BIR_INDEX_REGISTER
| 0,
85 ldva
.dest
, ldva
.dest
+ 1, ldva
.dest
+ 2,
89 nir_type_uint32
, nir_type_uint32
, nir_type_uint32
,
94 bi_context
*ctx
= rzalloc(NULL
, bi_context
);
95 ctx
->stage
= MESA_SHADER_VERTEX
;
97 bi_block
*blk
= rzalloc(ctx
, bi_block
);
98 blk
->scheduled
= true;
100 blk
->base
.predecessors
= _mesa_set_create(blk
,
102 _mesa_key_pointer_equal
);
104 list_inithead(&ctx
->blocks
);
105 list_addtail(&blk
->base
.link
, &ctx
->blocks
);
106 list_inithead(&blk
->clauses
);
108 bi_clause
*clauses
[4] = {
109 rzalloc(ctx
, bi_clause
),
110 rzalloc(ctx
, bi_clause
),
111 rzalloc(ctx
, bi_clause
),
112 rzalloc(ctx
, bi_clause
)
115 for (unsigned i
= 0; i
< 4; ++i
) {
116 clauses
[i
]->bundle_count
= 1;
117 list_addtail(&clauses
[i
]->link
, &blk
->clauses
);
118 clauses
[i
]->scoreboard_id
= (i
& 1);
121 clauses
[i
]->dependencies
= 1 << (~i
& 1);
122 clauses
[i
]->data_register_write_barrier
= true;
126 clauses
[0]->bundles
[0].add
= &ldubo
;
127 clauses
[0]->clause_type
= BIFROST_CLAUSE_UBO
;
130 clauses
[1]->bundles
[0].fma
= ins
;
132 clauses
[1]->bundles
[0].add
= ins
;
134 clauses
[0]->constant_count
= 1;
135 clauses
[1]->constant_count
= 1;
136 clauses
[1]->constants
[0] = ins
->constant
.u64
;
138 clauses
[2]->bundles
[0].add
= &ldva
;
139 clauses
[3]->bundles
[0].add
= &st
;
141 clauses
[2]->clause_type
= BIFROST_CLAUSE_UBO
;
142 clauses
[3]->clause_type
= BIFROST_CLAUSE_SSBO_STORE
;
144 panfrost_program prog
;
145 bi_pack(ctx
, &prog
.compiled
);
147 bool succ
= bit_vertex(dev
, prog
, input
, 16, NULL
, 0,
150 if (debug
>= BIT_DEBUG_ALL
|| (!succ
&& debug
>= BIT_DEBUG_FAIL
)) {
151 bi_print_shader(ctx
, stderr
);
152 disassemble_bifrost(stderr
, prog
.compiled
.data
, prog
.compiled
.size
, true);
158 /* Utilities for generating tests */
161 bit_generate_float4(float *mem
)
163 for (unsigned i
= 0; i
< 4; ++i
)
164 mem
[i
] = (float) ((rand() & 255) - 127) / 16.0;
168 bit_generate_half8(uint16_t *mem
)
170 for (unsigned i
= 0; i
< 8; ++i
)
171 mem
[i
] = _mesa_float_to_half(((float) (rand() & 255) - 127) / 16.0);
174 static bi_instruction
175 bit_ins(enum bi_class C
, unsigned argc
, nir_alu_type base
, unsigned size
)
177 nir_alu_type T
= base
| size
;
179 bi_instruction ins
= {
181 .dest
= BIR_INDEX_REGISTER
| 0,
185 for (unsigned i
= 0; i
< argc
; ++i
) {
186 ins
.src
[i
] = BIR_INDEX_REGISTER
| i
;
187 ins
.src_types
[i
] = T
;
193 #define BIT_FOREACH_SWIZZLE(swz, args, sz) \
194 for (unsigned swz = 0; swz < ((sz == 16) ? (1 << (2 * args)) : 1); ++swz)
197 bit_apply_swizzle(bi_instruction
*ins
, unsigned swz
, unsigned args
, unsigned sz
)
199 unsigned slots_per_arg
= (sz
== 16) ? 4 : 1;
200 unsigned slots_per_chan
= (sz
== 16) ? 1 : 0;
201 unsigned mask
= (sz
== 16) ? 1 : 0;
203 for (unsigned i
= 0; i
< args
; ++i
) {
204 for (unsigned j
= 0; j
< (32 / sz
); ++j
) {
205 ins
->swizzle
[i
][j
] = ((swz
>> (slots_per_arg
* i
)) >> (slots_per_chan
* j
)) & mask
;
210 /* Tests all 64 combinations of floating point modifiers for a given
211 * instruction / floating-type / test type */
214 bit_fmod_helper(struct panfrost_device
*dev
,
215 enum bi_class c
, unsigned size
, bool fma
,
216 uint32_t *input
, enum bit_debug debug
, unsigned op
)
218 bi_instruction ins
= bit_ins(c
, 2, nir_type_float
, size
);
220 bool fp16
= (size
== 16);
221 bool has_outmods
= fma
|| !fp16
;
223 for (unsigned outmod
= 0; outmod
< (has_outmods
? 4 : 1); ++outmod
) {
224 BIT_FOREACH_SWIZZLE(swz
, 2, size
) {
225 for (unsigned inmod
= 0; inmod
< 16; ++inmod
) {
228 ins
.src_abs
[0] = (inmod
& 0x1);
229 ins
.src_abs
[1] = (inmod
& 0x2);
230 ins
.src_neg
[0] = (inmod
& 0x4);
231 ins
.src_neg
[1] = (inmod
& 0x8);
232 bit_apply_swizzle(&ins
, swz
, 2, size
);
234 if (!bit_test_single(dev
, &ins
, input
, fma
, debug
)) {
235 fprintf(stderr
, "FAIL: fmod.%s%u.%s%s.%u\n",
239 outmod
? bi_output_mod_name(outmod
) : ".none",
248 bit_fma_helper(struct panfrost_device
*dev
,
249 unsigned size
, uint32_t *input
, enum bit_debug debug
)
251 bi_instruction ins
= bit_ins(BI_FMA
, 3, nir_type_float
, size
);
253 for (unsigned outmod
= 0; outmod
< 4; ++outmod
) {
254 for (unsigned inmod
= 0; inmod
< 8; ++inmod
) {
256 ins
.src_neg
[0] = (inmod
& 0x1);
257 ins
.src_neg
[1] = (inmod
& 0x2);
258 ins
.src_neg
[2] = (inmod
& 0x4);
260 if (!bit_test_single(dev
, &ins
, input
, true, debug
)) {
261 fprintf(stderr
, "FAIL: fma%u%s.%u\n",
263 outmod
? bi_output_mod_name(outmod
) : ".none",
271 bit_fma_mscale_helper(struct panfrost_device
*dev
, uint32_t *input
, enum bit_debug debug
)
273 bi_instruction ins
= bit_ins(BI_FMA
, 4, nir_type_float
, 32);
274 ins
.op
.mscale
= true;
275 ins
.src_types
[3] = nir_type_int32
;
276 ins
.src
[2] = ins
.src
[3]; /* Not enough ports! */
278 for (unsigned outmod
= 0; outmod
< 4; ++outmod
) {
279 for (unsigned inmod
= 0; inmod
< 8; ++inmod
) {
281 ins
.src_abs
[0] = (inmod
& 0x1);
282 ins
.src_neg
[1] = (inmod
& 0x2);
283 ins
.src_neg
[2] = (inmod
& 0x4);
285 if (!bit_test_single(dev
, &ins
, input
, true, debug
)) {
286 fprintf(stderr
, "FAIL: fma_mscale%s.%u\n",
287 outmod
? bi_output_mod_name(outmod
) : ".none",
295 bit_csel_helper(struct panfrost_device
*dev
,
296 unsigned size
, uint32_t *input
, enum bit_debug debug
)
298 bi_instruction ins
= bit_ins(BI_CSEL
, 4, nir_type_uint
, size
);
300 /* SCHEDULER: We can only read 3 registers at once. */
301 ins
.src
[2] = ins
.src
[0];
303 for (enum bi_cond cond
= BI_COND_LT
; cond
<= BI_COND_NE
; ++cond
) {
306 if (!bit_test_single(dev
, &ins
, input
, true, debug
)) {
307 fprintf(stderr
, "FAIL: csel%u.%s\n",
308 size
, bi_cond_name(cond
));
314 bit_special_helper(struct panfrost_device
*dev
,
315 unsigned size
, uint32_t *input
, enum bit_debug debug
)
317 bi_instruction ins
= bit_ins(BI_SPECIAL
, 2, nir_type_float
, size
);
318 uint32_t exp_input
[4];
320 for (enum bi_special_op op
= BI_SPECIAL_FRCP
; op
<= BI_SPECIAL_EXP2_LOW
; ++op
) {
321 if (op
== BI_SPECIAL_EXP2_LOW
) {
322 /* exp2 only supported in fp32 mode */
326 /* Give expected input */
327 exp_input
[1] = input
[0];
328 float *ff
= (float *) input
;
329 exp_input
[0] = (int) (ff
[0] * (1 << 24));
332 for (unsigned c
= 0; c
< ((size
== 16) ? 2 : 1); ++c
) {
334 ins
.swizzle
[0][0] = c
;
336 if (!bit_test_single(dev
, &ins
,
337 op
== BI_SPECIAL_EXP2_LOW
? exp_input
: input
,
339 fprintf(stderr
, "FAIL: special%u.%s\n",
340 size
, bi_special_op_name(op
));
347 bit_table_helper(struct panfrost_device
*dev
, uint32_t *input
, enum bit_debug debug
)
349 bi_instruction ins
= bit_ins(BI_TABLE
, 1, nir_type_float
, 32);
351 for (enum bi_table_op op
= 0; op
<= BI_TABLE_LOG2_U_OVER_U_1_LOW
; ++op
) {
354 if (!bit_test_single(dev
, &ins
, input
, false, debug
)) {
355 fprintf(stderr
, "FAIL: table.%s\n",
356 bi_table_op_name(op
));
362 bit_frexp_helper(struct panfrost_device
*dev
, uint32_t *input
, enum bit_debug debug
)
364 bi_instruction ins
= bit_ins(BI_FREXP
, 1, nir_type_float
, 32);
365 ins
.dest_type
= nir_type_int32
;
367 for (enum bi_frexp_op op
= 0; op
<= BI_FREXPE_LOG
; ++op
) {
370 if (!bit_test_single(dev
, &ins
, input
, true, debug
)) {
371 fprintf(stderr
, "FAIL: frexp.%s\n",
372 bi_frexp_op_name(op
));
378 bit_round_helper(struct panfrost_device
*dev
, uint32_t *input
, unsigned sz
, bool FMA
, enum bit_debug debug
)
380 bi_instruction ins
= bit_ins(BI_ROUND
, 1, nir_type_float
, sz
);
382 for (enum bifrost_roundmode mode
= 0; mode
<= 3; ++mode
) {
383 for (unsigned swizzle
= 0; swizzle
< (sz
== 16 ? 4 : 1); ++swizzle
) {
385 for (unsigned i
= 0; i
< 2; ++i
)
386 ins
.swizzle
[0][i
] = ((swizzle
>> i
) & 1) ? 1 : 0;
389 ins
.roundmode
= mode
;
391 if (!bit_test_single(dev
, &ins
, input
, FMA
, debug
)) {
392 fprintf(stderr
, "FAIL: round.%u.%u\n",
400 bit_reduce_helper(struct panfrost_device
*dev
, uint32_t *input
, enum bit_debug debug
)
402 bi_instruction ins
= bit_ins(BI_REDUCE_FMA
, 2, nir_type_float
, 32);
404 for (enum bi_reduce_op op
= 0; op
<= BI_REDUCE_ADD_FREXPM
; ++op
) {
407 if (!bit_test_single(dev
, &ins
, input
, true, debug
)) {
408 fprintf(stderr
, "FAIL: reduce.%s\n",
409 bi_reduce_op_name(op
));
415 bit_select_helper(struct panfrost_device
*dev
, uint32_t *input
, unsigned size
, enum bit_debug debug
)
417 unsigned C
= 32 / size
;
418 bi_instruction ins
= bit_ins(BI_SELECT
, C
, nir_type_uint
, 32);
420 for (unsigned c
= 0; c
< C
; ++c
)
421 ins
.src_types
[c
] = nir_type_uint
| size
;
424 /* SCHEDULER: We can only read 3 registers at once. */
425 ins
.src
[2] = ins
.src
[0];
428 /* Each argument has swizzle {lo, hi} so 2^C options */
429 unsigned hi
= (size
== 16) ? 1 : 2;
431 for (unsigned add
= 0; add
< ((size
== 16) ? 2 : 1); ++add
) {
432 for (unsigned swizzle
= 0; swizzle
< (1 << C
); ++swizzle
) {
433 for (unsigned i
= 0; i
< C
; ++i
)
434 ins
.swizzle
[i
][0] = ((swizzle
>> i
) & 1) ? hi
: 0;
436 if (!bit_test_single(dev
, &ins
, input
, !add
, debug
)) {
437 fprintf(stderr
, "FAIL: select.%u.%u\n",
445 bit_fcmp_helper(struct panfrost_device
*dev
, uint32_t *input
, unsigned size
, enum bit_debug debug
, bool FMA
)
447 bi_instruction ins
= bit_ins(BI_CMP
, 2, nir_type_float
, size
);
448 ins
.dest_type
= nir_type_uint
| size
;
450 /* 16-bit has swizzles and abs. 32-bit has abs/neg mods. */
451 unsigned max_mods
= (size
== 16) ? 64 : (size
== 32) ? 16 : 1;
453 for (enum bi_cond cond
= BI_COND_LT
; cond
<= BI_COND_NE
; ++cond
) {
454 for (unsigned mods
= 0; mods
< max_mods
; ++mods
) {
458 for (unsigned i
= 0; i
< 2; ++i
) {
459 ins
.swizzle
[i
][0] = ((mods
>> (i
* 2)) & 1) ? 1 : 0;
460 ins
.swizzle
[i
][1] = ((mods
>> (i
* 2)) & 2) ? 1 : 0;
463 ins
.src_abs
[0] = (mods
& 16) ? true : false;
464 ins
.src_abs
[1] = (mods
& 32) ? true : false;
465 } else if (size
== 8) {
466 for (unsigned i
= 0; i
< 2; ++i
) {
467 for (unsigned j
= 0; j
< 4; ++j
)
468 ins
.swizzle
[i
][j
] = j
;
470 } else if (size
== 32) {
471 ins
.src_abs
[0] = (mods
& 1) ? true : false;
472 ins
.src_abs
[1] = (mods
& 2) ? true : false;
473 ins
.src_neg
[0] = (mods
& 4) ? true : false;
474 ins
.src_neg
[1] = (mods
& 8) ? true : false;
477 if (!bit_test_single(dev
, &ins
, input
, FMA
, debug
)) {
478 fprintf(stderr
, "FAIL: cmp.%s.%u.%u.%u\n",
479 FMA
? "fma" : "add", size
, mods
, cond
);
486 bit_icmp_helper(struct panfrost_device
*dev
, uint32_t *input
, unsigned size
, nir_alu_type T
, enum bit_debug debug
)
488 bi_instruction ins
= bit_ins(BI_CMP
, 2, T
, size
);
489 ins
.dest_type
= nir_type_uint
| size
;
491 for (enum bi_cond cond
= BI_COND_LT
; cond
<= BI_COND_NE
; ++cond
) {
492 BIT_FOREACH_SWIZZLE(swz
, 2, size
) {
494 bit_apply_swizzle(&ins
, swz
, 2, size
);
496 if (!bit_test_single(dev
, &ins
, input
, false, debug
)) {
497 fprintf(stderr
, "FAIL: icmp.%u.%u.%u\n",
507 bit_convert_helper(struct panfrost_device
*dev
, unsigned from_size
,
508 unsigned to_size
, unsigned cx
, unsigned cy
, bool FMA
,
509 enum bifrost_roundmode roundmode
,
510 uint32_t *input
, enum bit_debug debug
)
512 bi_instruction ins
= {
514 .dest
= BIR_INDEX_REGISTER
| 0,
515 .src
= { BIR_INDEX_REGISTER
| 0 }
518 nir_alu_type Ts
[3] = { nir_type_float
, nir_type_uint
, nir_type_int
};
520 for (unsigned from_base
= 0; from_base
< 3; ++from_base
) {
521 for (unsigned to_base
= 0; to_base
< 3; ++to_base
) {
522 /* Discard invalid combinations.. */
523 if ((from_size
== to_size
) && (from_base
== to_base
))
526 /* Can't switch signedness */
527 if (from_base
&& to_base
)
530 /* No F16_TO_I32, etc */
531 if (from_size
!= to_size
&& from_base
== 0 && to_base
)
534 if (from_size
!= to_size
&& from_base
&& to_base
== 0)
537 /* No need, just ignore the upper half */
538 if (from_size
> to_size
&& from_base
== to_base
&& from_base
)
541 ins
.dest_type
= Ts
[to_base
] | to_size
;
542 ins
.src_types
[0] = Ts
[from_base
] | from_size
;
543 ins
.roundmode
= roundmode
;
544 ins
.swizzle
[0][0] = cx
;
545 ins
.swizzle
[0][1] = cy
;
547 if (!bit_test_single(dev
, &ins
, input
, FMA
, debug
)) {
548 fprintf(stderr
, "FAIL: convert.%u-%u.%u-%u.%u%u\n",
549 from_base
, from_size
,
558 bit_constant_helper(struct panfrost_device
*dev
,
559 uint32_t *input
, enum bit_debug debug
)
561 enum bi_class C
[3] = { BI_MOV
, BI_ADD
, BI_FMA
};
563 for (unsigned doubled
= 0; doubled
< 2; ++doubled
) {
564 for (unsigned count
= 1; count
<= 3; ++count
) {
565 bi_instruction ins
= bit_ins(C
[count
- 1], count
, nir_type_float
, 32);
567 ins
.src
[0] = BIR_INDEX_CONSTANT
| 0;
568 ins
.src
[1] = (count
>= 2) ? BIR_INDEX_CONSTANT
| (doubled
? 32 : 0) : 0;
569 ins
.src
[2] = (count
>= 3) ? BIR_INDEX_ZERO
: 0;
571 ins
.constant
.u64
= doubled
?
572 0x3f800000ull
| (0x3f000000ull
<< 32ull) :
575 if (!bit_test_single(dev
, &ins
, input
, true, debug
)) {
576 fprintf(stderr
, "FAIL: constants.%s.%u\n",
577 doubled
? "two" : "one",
585 bit_bitwise_helper(struct panfrost_device
*dev
, uint32_t *input
, unsigned size
, enum bit_debug debug
)
587 bi_instruction ins
= bit_ins(BI_BITWISE
, 3, nir_type_uint
, size
);
590 ins
.src
[2] = BIR_INDEX_ZERO
;
592 /* Force identity swizzle -- bitwise is not swizzleable */
593 for (unsigned i
= 0; i
< 2; ++i
) {
594 for (unsigned j
= 0; j
< (32 / size
); ++j
)
595 ins
.swizzle
[i
][j
] = j
;
598 for (unsigned op
= BI_BITWISE_AND
; op
<= BI_BITWISE_XOR
; ++op
) {
601 for (unsigned mods
= 0; mods
< 4; ++mods
) {
602 ins
.bitwise
.src_invert
[0] = mods
& 1;
603 ins
.bitwise
.src_invert
[1] = mods
& 2;
605 if (!bit_test_single(dev
, &ins
, input
, true, debug
)) {
606 fprintf(stderr
, "FAIL: bitwise.%u.%u.%u\n",
614 bit_packing(struct panfrost_device
*dev
, enum bit_debug debug
)
619 bit_generate_float4(input32
);
620 bit_generate_half8(input16
);
622 bit_constant_helper(dev
, (uint32_t *) input32
, debug
);
624 for (unsigned sz
= 16; sz
<= 32; sz
*= 2) {
626 (sz
== 16) ? (uint32_t *) input16
:
627 (uint32_t *) input32
;
629 bit_fmod_helper(dev
, BI_ADD
, sz
, true, input
, debug
, 0);
630 bit_fmod_helper(dev
, BI_ADD
, sz
, false, input
, debug
, 0);
631 bit_round_helper(dev
, (uint32_t *) input32
, sz
, true, debug
);
633 bit_fmod_helper(dev
, BI_MINMAX
, sz
, false, input
, debug
, BI_MINMAX_MIN
);
634 bit_fmod_helper(dev
, BI_MINMAX
, sz
, false, input
, debug
, BI_MINMAX_MAX
);
636 bit_fma_helper(dev
, sz
, input
, debug
);
637 bit_icmp_helper(dev
, input
, sz
, nir_type_uint
, debug
);
638 bit_icmp_helper(dev
, input
, sz
, nir_type_int
, debug
);
641 for (unsigned sz
= 32; sz
<= 32; sz
*= 2)
642 bit_csel_helper(dev
, sz
, (uint32_t *) input32
, debug
);
644 float special
[4] = { 0.9 };
645 uint32_t special16
[4] = { _mesa_float_to_half(special
[0]) | (_mesa_float_to_half(0.2) << 16) };
647 bit_table_helper(dev
, (uint32_t *) special
, debug
);
649 for (unsigned sz
= 16; sz
<= 32; sz
*= 2) {
651 (sz
== 16) ? special16
:
652 (uint32_t *) special
;
654 bit_special_helper(dev
, sz
, input
, debug
);
657 for (unsigned rm
= 0; rm
< 4; ++rm
) {
658 bit_convert_helper(dev
, 32, 32, 0, 0, false, rm
, (uint32_t *) input32
, debug
);
660 for (unsigned c
= 0; c
< 2; ++c
)
661 bit_convert_helper(dev
, 32, 16, c
, 0, false, rm
, (uint32_t *) input32
, debug
);
663 bit_convert_helper(dev
, 16, 32, 0, 0, false, rm
, (uint32_t *) input16
, debug
);
665 for (unsigned c
= 0; c
< 4; ++c
)
666 bit_convert_helper(dev
, 16, 16, c
& 1, c
>> 1, false, rm
, (uint32_t *) input16
, debug
);
669 bit_frexp_helper(dev
, (uint32_t *) input32
, debug
);
670 bit_reduce_helper(dev
, (uint32_t *) input32
, debug
);
672 uint32_t mscale_input
[4];
673 memcpy(mscale_input
, input32
, sizeof(input32
));
674 mscale_input
[3] = 0x7;
675 bit_fma_mscale_helper(dev
, mscale_input
, debug
);
677 for (unsigned sz
= 8; sz
<= 16; sz
*= 2) {
678 bit_select_helper(dev
, (uint32_t *) input32
, sz
, debug
);
681 bit_fcmp_helper(dev
, (uint32_t *) input32
, 32, debug
, true);
682 bit_fcmp_helper(dev
, (uint32_t *) input32
, 16, debug
, true);
684 for (unsigned sz
= 8; sz
<= 32; sz
*= 2)
685 bit_bitwise_helper(dev
, (uint32_t *) input32
, sz
, debug
);