pan/bi: Eliminate writemasks in the IR
[mesa.git] / src / panfrost / bifrost / test / bi_test_pack.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "bit.h"
28 #include "bi_print.h"
29 #include "util/half_float.h"
30 #include "bifrost/disassemble.h"
31
32 /* Instruction packing tests */
33
34 static bool
35 bit_test_single(struct panfrost_device *dev,
36 bi_instruction *ins,
37 uint32_t input[4],
38 bool fma, enum bit_debug debug)
39 {
40 /* First, simulate the instruction */
41 struct bit_state s = { 0 };
42 memcpy(s.r, input, 16);
43 bit_step(&s, ins, fma);
44
45 /* Next, wrap it up and pack it */
46
47 bi_instruction ldubo = {
48 .type = BI_LOAD_UNIFORM,
49 .src = {
50 BIR_INDEX_CONSTANT,
51 BIR_INDEX_ZERO
52 },
53 .src_types = {
54 nir_type_uint32,
55 nir_type_uint32,
56 },
57 .dest = BIR_INDEX_REGISTER | 0,
58 .dest_type = nir_type_uint32,
59 .vector_channels = 4,
60 };
61
62 bi_instruction ldva = {
63 .type = BI_LOAD_VAR_ADDRESS,
64 .vector_channels = 3,
65 .dest = BIR_INDEX_REGISTER | 32,
66 .dest_type = nir_type_uint32,
67 .src = {
68 BIR_INDEX_CONSTANT,
69 BIR_INDEX_REGISTER | 61,
70 BIR_INDEX_REGISTER | 62,
71 0,
72 },
73 .src_types = {
74 nir_type_uint32,
75 nir_type_uint32,
76 nir_type_uint32,
77 nir_type_uint32,
78 }
79 };
80
81 bi_instruction st = {
82 .type = BI_STORE_VAR,
83 .src = {
84 BIR_INDEX_REGISTER | 0,
85 ldva.dest, ldva.dest + 1, ldva.dest + 2,
86 },
87 .src_types = {
88 nir_type_uint32,
89 nir_type_uint32, nir_type_uint32, nir_type_uint32,
90 },
91 .vector_channels = 4
92 };
93
94 bi_context *ctx = rzalloc(NULL, bi_context);
95 ctx->stage = MESA_SHADER_VERTEX;
96
97 bi_block *blk = rzalloc(ctx, bi_block);
98 blk->scheduled = true;
99
100 blk->base.predecessors = _mesa_set_create(blk,
101 _mesa_hash_pointer,
102 _mesa_key_pointer_equal);
103
104 list_inithead(&ctx->blocks);
105 list_addtail(&blk->base.link, &ctx->blocks);
106 list_inithead(&blk->clauses);
107
108 bi_clause *clauses[4] = {
109 rzalloc(ctx, bi_clause),
110 rzalloc(ctx, bi_clause),
111 rzalloc(ctx, bi_clause),
112 rzalloc(ctx, bi_clause)
113 };
114
115 for (unsigned i = 0; i < 4; ++i) {
116 clauses[i]->bundle_count = 1;
117 list_addtail(&clauses[i]->link, &blk->clauses);
118 clauses[i]->scoreboard_id = (i & 1);
119
120 if (i) {
121 clauses[i]->dependencies = 1 << (~i & 1);
122 clauses[i]->data_register_write_barrier = true;
123 }
124 }
125
126 clauses[0]->bundles[0].add = &ldubo;
127 clauses[0]->clause_type = BIFROST_CLAUSE_UBO;
128
129 if (fma)
130 clauses[1]->bundles[0].fma = ins;
131 else
132 clauses[1]->bundles[0].add = ins;
133
134 clauses[0]->constant_count = 1;
135 clauses[1]->constant_count = 1;
136 clauses[1]->constants[0] = ins->constant.u64;
137
138 clauses[2]->bundles[0].add = &ldva;
139 clauses[3]->bundles[0].add = &st;
140
141 clauses[2]->clause_type = BIFROST_CLAUSE_UBO;
142 clauses[3]->clause_type = BIFROST_CLAUSE_SSBO_STORE;
143
144 panfrost_program prog;
145 bi_pack(ctx, &prog.compiled);
146
147 bool succ = bit_vertex(dev, prog, input, 16, NULL, 0,
148 s.r, 16, debug);
149
150 if (debug >= BIT_DEBUG_ALL || (!succ && debug >= BIT_DEBUG_FAIL)) {
151 bi_print_shader(ctx, stderr);
152 disassemble_bifrost(stderr, prog.compiled.data, prog.compiled.size, true);
153 }
154
155 return succ;
156 }
157
158 /* Utilities for generating tests */
159
160 static void
161 bit_generate_float4(float *mem)
162 {
163 for (unsigned i = 0; i < 4; ++i)
164 mem[i] = (float) ((rand() & 255) - 127) / 16.0;
165 }
166
167 static void
168 bit_generate_half8(uint16_t *mem)
169 {
170 for (unsigned i = 0; i < 8; ++i)
171 mem[i] = _mesa_float_to_half(((float) (rand() & 255) - 127) / 16.0);
172 }
173
174 static bi_instruction
175 bit_ins(enum bi_class C, unsigned argc, nir_alu_type base, unsigned size)
176 {
177 nir_alu_type T = base | size;
178
179 bi_instruction ins = {
180 .type = C,
181 .dest = BIR_INDEX_REGISTER | 0,
182 .dest_type = T,
183 };
184
185 for (unsigned i = 0; i < argc; ++i) {
186 ins.src[i] = BIR_INDEX_REGISTER | i;
187 ins.src_types[i] = T;
188 }
189
190 return ins;
191 }
192
193 /* Tests all 64 combinations of floating point modifiers for a given
194 * instruction / floating-type / test type */
195
196 static void
197 bit_fmod_helper(struct panfrost_device *dev,
198 enum bi_class c, unsigned size, bool fma,
199 uint32_t *input, enum bit_debug debug, unsigned op)
200 {
201 bi_instruction ins = bit_ins(c, 2, nir_type_float, size);
202
203 bool fp16 = (size == 16);
204 bool has_outmods = fma || !fp16;
205
206 for (unsigned outmod = 0; outmod < (has_outmods ? 4 : 1); ++outmod) {
207 for (unsigned inmod = 0; inmod < 16; ++inmod) {
208 ins.outmod = outmod;
209 ins.op.minmax = op;
210 ins.src_abs[0] = (inmod & 0x1);
211 ins.src_abs[1] = (inmod & 0x2);
212 ins.src_neg[0] = (inmod & 0x4);
213 ins.src_neg[1] = (inmod & 0x8);
214
215 /* Skip over tests that cannot run */
216 if ((fma || c == BI_MINMAX) && fp16 && ins.src_abs[0] && ins.src_abs[1])
217 continue;
218
219 if (!bit_test_single(dev, &ins, input, fma, debug)) {
220 fprintf(stderr, "FAIL: fmod.%s%u.%s%s.%u\n",
221 bi_class_name(c),
222 size,
223 fma ? "fma" : "add",
224 outmod ? bi_output_mod_name(outmod) : ".none",
225 inmod);
226 }
227 }
228 }
229 }
230
231 static void
232 bit_fma_helper(struct panfrost_device *dev,
233 unsigned size, uint32_t *input, enum bit_debug debug)
234 {
235 bi_instruction ins = bit_ins(BI_FMA, 3, nir_type_float, size);
236
237 for (unsigned outmod = 0; outmod < 4; ++outmod) {
238 for (unsigned inmod = 0; inmod < 8; ++inmod) {
239 ins.outmod = outmod;
240 ins.src_neg[0] = (inmod & 0x1);
241 ins.src_neg[1] = (inmod & 0x2);
242 ins.src_neg[2] = (inmod & 0x4);
243
244 if (!bit_test_single(dev, &ins, input, true, debug)) {
245 fprintf(stderr, "FAIL: fma%u%s.%u\n",
246 size,
247 outmod ? bi_output_mod_name(outmod) : ".none",
248 inmod);
249 }
250 }
251 }
252 }
253
254 static void
255 bit_fma_mscale_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
256 {
257 bi_instruction ins = bit_ins(BI_FMA, 4, nir_type_float, 32);
258 ins.op.mscale = true;
259 ins.src_types[3] = nir_type_int32;
260 ins.src[2] = ins.src[3]; /* Not enough ports! */
261
262 for (unsigned outmod = 0; outmod < 4; ++outmod) {
263 for (unsigned inmod = 0; inmod < 8; ++inmod) {
264 ins.outmod = outmod;
265 ins.src_abs[0] = (inmod & 0x1);
266 ins.src_neg[1] = (inmod & 0x2);
267 ins.src_neg[2] = (inmod & 0x4);
268
269 if (!bit_test_single(dev, &ins, input, true, debug)) {
270 fprintf(stderr, "FAIL: fma_mscale%s.%u\n",
271 outmod ? bi_output_mod_name(outmod) : ".none",
272 inmod);
273 }
274 }
275 }
276 }
277
278 static void
279 bit_csel_helper(struct panfrost_device *dev,
280 unsigned size, uint32_t *input, enum bit_debug debug)
281 {
282 bi_instruction ins = bit_ins(BI_CSEL, 4, nir_type_uint, size);
283
284 /* SCHEDULER: We can only read 3 registers at once. */
285 ins.src[2] = ins.src[0];
286
287 for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
288 ins.csel_cond = cond;
289
290 if (!bit_test_single(dev, &ins, input, true, debug)) {
291 fprintf(stderr, "FAIL: csel%u.%s\n",
292 size, bi_cond_name(cond));
293 }
294 }
295 }
296
297 static void
298 bit_special_helper(struct panfrost_device *dev,
299 unsigned size, uint32_t *input, enum bit_debug debug)
300 {
301 bi_instruction ins = bit_ins(BI_SPECIAL, 2, nir_type_float, size);
302 uint32_t exp_input[4];
303
304 for (enum bi_special_op op = BI_SPECIAL_FRCP; op <= BI_SPECIAL_EXP2_LOW; ++op) {
305 if (op == BI_SPECIAL_EXP2_LOW) {
306 /* exp2 only supported in fp32 mode */
307 if (size != 32)
308 continue;
309
310 /* Give expected input */
311 exp_input[1] = input[0];
312 float *ff = (float *) input;
313 exp_input[0] = (int) (ff[0] * (1 << 24));
314 }
315
316 for (unsigned c = 0; c < ((size == 16) ? 2 : 1); ++c) {
317 ins.op.special = op;
318 ins.swizzle[0][0] = c;
319
320 if (!bit_test_single(dev, &ins,
321 op == BI_SPECIAL_EXP2_LOW ? exp_input : input,
322 false, debug)) {
323 fprintf(stderr, "FAIL: special%u.%s\n",
324 size, bi_special_op_name(op));
325 }
326 }
327 }
328 }
329
330 static void
331 bit_table_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
332 {
333 bi_instruction ins = bit_ins(BI_TABLE, 1, nir_type_float, 32);
334
335 for (enum bi_table_op op = 0; op <= BI_TABLE_LOG2_U_OVER_U_1_LOW; ++op) {
336 ins.op.table = op;
337
338 if (!bit_test_single(dev, &ins, input, false, debug)) {
339 fprintf(stderr, "FAIL: table.%s\n",
340 bi_table_op_name(op));
341 }
342 }
343 }
344
345 static void
346 bit_frexp_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
347 {
348 bi_instruction ins = bit_ins(BI_FREXP, 1, nir_type_float, 32);
349 ins.dest_type = nir_type_int32;
350
351 for (enum bi_frexp_op op = 0; op <= BI_FREXPE_LOG; ++op) {
352 ins.op.frexp = op;
353
354 if (!bit_test_single(dev, &ins, input, true, debug)) {
355 fprintf(stderr, "FAIL: frexp.%s\n",
356 bi_frexp_op_name(op));
357 }
358 }
359 }
360
361 static void
362 bit_reduce_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
363 {
364 bi_instruction ins = bit_ins(BI_REDUCE_FMA, 2, nir_type_float, 32);
365
366 for (enum bi_reduce_op op = 0; op <= BI_REDUCE_ADD_FREXPM; ++op) {
367 ins.op.reduce = op;
368
369 if (!bit_test_single(dev, &ins, input, true, debug)) {
370 fprintf(stderr, "FAIL: reduce.%s\n",
371 bi_reduce_op_name(op));
372 }
373 }
374 }
375
376 static void
377 bit_convert_helper(struct panfrost_device *dev, unsigned from_size,
378 unsigned to_size, unsigned cx, unsigned cy, bool FMA,
379 enum bifrost_roundmode roundmode,
380 uint32_t *input, enum bit_debug debug)
381 {
382 bi_instruction ins = {
383 .type = BI_CONVERT,
384 .dest = BIR_INDEX_REGISTER | 0,
385 .src = { BIR_INDEX_REGISTER | 0 }
386 };
387
388 nir_alu_type Ts[3] = { nir_type_float, nir_type_uint, nir_type_int };
389
390 for (unsigned from_base = 0; from_base < 3; ++from_base) {
391 for (unsigned to_base = 0; to_base < 3; ++to_base) {
392 /* Discard invalid combinations.. */
393 if ((from_size == to_size) && (from_base == to_base))
394 continue;
395
396 /* Can't switch signedness */
397 if (from_base && to_base)
398 continue;
399
400 /* No F16_TO_I32, etc */
401 if (from_size != to_size && from_base == 0 && to_base)
402 continue;
403
404 if (from_size != to_size && from_base && to_base == 0)
405 continue;
406
407 /* No need, just ignore the upper half */
408 if (from_size > to_size && from_base == to_base && from_base)
409 continue;
410
411 ins.dest_type = Ts[to_base] | to_size;
412 ins.src_types[0] = Ts[from_base] | from_size;
413 ins.roundmode = roundmode;
414 ins.swizzle[0][0] = cx;
415 ins.swizzle[0][1] = cy;
416
417 if (!bit_test_single(dev, &ins, input, FMA, debug)) {
418 fprintf(stderr, "FAIL: convert.%u-%u.%u-%u.%u%u\n",
419 from_base, from_size,
420 to_base, to_size,
421 cx, cy);
422 }
423 }
424 }
425 }
426
427 static void
428 bit_constant_helper(struct panfrost_device *dev,
429 uint32_t *input, enum bit_debug debug)
430 {
431 enum bi_class C[3] = { BI_MOV, BI_ADD, BI_FMA };
432
433 for (unsigned doubled = 0; doubled < 2; ++doubled) {
434 for (unsigned count = 1; count <= 3; ++count) {
435 bi_instruction ins = bit_ins(C[count - 1], count, nir_type_float, 32);
436
437 ins.src[0] = BIR_INDEX_CONSTANT | 0;
438 ins.src[1] = (count >= 2) ? BIR_INDEX_CONSTANT | (doubled ? 32 : 0) : 0;
439 ins.src[2] = (count >= 3) ? BIR_INDEX_ZERO : 0;
440
441 ins.constant.u64 = doubled ?
442 0x3f800000ull | (0x3f000000ull << 32ull) :
443 0x3f800000ull;
444
445 if (!bit_test_single(dev, &ins, input, true, debug)) {
446 fprintf(stderr, "FAIL: constants.%s.%u\n",
447 doubled ? "two" : "one",
448 count);
449 }
450 }
451 }
452 }
453
454 void
455 bit_packing(struct panfrost_device *dev, enum bit_debug debug)
456 {
457 float input32[4];
458 uint16_t input16[8];
459
460 bit_generate_float4(input32);
461 bit_generate_half8(input16);
462
463 bit_constant_helper(dev, (uint32_t *) input32, debug);
464
465 for (unsigned sz = 16; sz <= 32; sz *= 2) {
466 uint32_t *input =
467 (sz == 16) ? (uint32_t *) input16 :
468 (uint32_t *) input32;
469
470 bit_fmod_helper(dev, BI_ADD, sz, true, input, debug, 0);
471 bit_fmod_helper(dev, BI_ADD, sz, false, input, debug, 0);
472
473 bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MIN);
474 bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MAX);
475
476 bit_fma_helper(dev, sz, input, debug);
477 }
478
479 for (unsigned sz = 32; sz <= 32; sz *= 2)
480 bit_csel_helper(dev, sz, (uint32_t *) input32, debug);
481
482 float special[4] = { 0.9 };
483 uint32_t special16[4] = { _mesa_float_to_half(special[0]) | (_mesa_float_to_half(0.2) << 16) };
484
485 bit_table_helper(dev, (uint32_t *) special, debug);
486
487 for (unsigned sz = 16; sz <= 32; sz *= 2) {
488 uint32_t *input =
489 (sz == 16) ? special16 :
490 (uint32_t *) special;
491
492 bit_special_helper(dev, sz, input, debug);
493 }
494
495 for (unsigned rm = 0; rm < 4; ++rm) {
496 bit_convert_helper(dev, 32, 32, 0, 0, false, rm, (uint32_t *) input32, debug);
497
498 for (unsigned c = 0; c < 2; ++c)
499 bit_convert_helper(dev, 32, 16, c, 0, false, rm, (uint32_t *) input32, debug);
500
501 bit_convert_helper(dev, 16, 32, 0, 0, false, rm, (uint32_t *) input16, debug);
502
503 for (unsigned c = 0; c < 4; ++c)
504 bit_convert_helper(dev, 16, 16, c & 1, c >> 1, false, rm, (uint32_t *) input16, debug);
505 }
506
507 bit_frexp_helper(dev, (uint32_t *) input32, debug);
508 bit_reduce_helper(dev, (uint32_t *) input32, debug);
509
510 uint32_t mscale_input[4];
511 memcpy(mscale_input, input32, sizeof(input32));
512 mscale_input[3] = 0x7;
513 bit_fma_mscale_helper(dev, mscale_input, debug);
514 }