nir: allow specifying filter callback in lower_alu_to_scalar
[mesa.git] / src / panfrost / bifrost / bifrost_compile.c
1 /*
2 * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "compiler/nir/nir_builder.h"
25 #include "bifrost_compile.h"
26 #include "bifrost_opts.h"
27 #include "bifrost_sched.h"
28 #include "compiler_defines.h"
29 #include "disassemble.h"
30 #include "bifrost_print.h"
31
32 #define BI_DEBUG
33
34 static int
35 glsl_type_size(const struct glsl_type *type, bool bindless)
36 {
37 return glsl_count_attribute_slots(type, false);
38 }
39
40 static void
41 optimize_nir(nir_shader *nir)
42 {
43 bool progress;
44
45 NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
46 NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
47
48 do {
49 progress = false;
50
51 NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
52
53 NIR_PASS(progress, nir, nir_lower_var_copies);
54 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
55
56 NIR_PASS(progress, nir, nir_copy_prop);
57 NIR_PASS(progress, nir, nir_opt_constant_folding);
58
59 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
60 NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
61 NIR_PASS(progress, nir, nir_opt_if, true);
62
63 } while (progress);
64
65 NIR_PASS(progress, nir, nir_copy_prop);
66 NIR_PASS(progress, nir, nir_opt_dce);
67 }
68
69 static unsigned
70 nir_src_index(compiler_context *ctx, nir_src *src)
71 {
72 if (src->is_ssa)
73 return src->ssa->index;
74 else
75 return ctx->func->impl->ssa_alloc + src->reg.reg->index;
76 }
77
78 static unsigned
79 nir_dest_index(compiler_context *ctx, nir_dest *dst)
80 {
81 if (dst->is_ssa)
82 return dst->ssa.index;
83 else
84 return ctx->func->impl->ssa_alloc + dst->reg.reg->index;
85 }
86
87 static unsigned
88 nir_alu_src_index(compiler_context *ctx, nir_alu_src *src)
89 {
90 return nir_src_index(ctx, &src->src);
91 }
92
93 struct bifrost_instruction *
94 mir_alloc_ins(struct bifrost_instruction instr)
95 {
96 struct bifrost_instruction *heap_ins = malloc(sizeof(instr));
97 memcpy(heap_ins, &instr, sizeof(instr));
98 return heap_ins;
99 }
100
101 static void
102 emit_mir_instruction(struct compiler_context *ctx, struct bifrost_instruction instr)
103 {
104 list_addtail(&(mir_alloc_ins(instr))->link, &ctx->current_block->instructions);
105 }
106
107 static void
108 bifrost_block_add_successor(bifrost_block *block, bifrost_block *successor)
109 {
110 assert(block->num_successors < ARRAY_SIZE(block->successors));
111 block->successors[block->num_successors++] = successor;
112 }
113
114 static void
115 emit_load_const(struct compiler_context *ctx, nir_load_const_instr *instr)
116 {
117 nir_ssa_def def = instr->def;
118
119 float *v = ralloc_array(NULL, float, 1);
120 nir_const_value_to_array(v, instr->value, instr->def.num_components, f32);
121 _mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v);
122 }
123
124 static uint32_t
125 alloc_mir_temp(struct compiler_context *ctx)
126 {
127 return SSA_TEMP_VALUE(ctx->mir_temp++);
128 }
129
130 static uint32_t
131 emit_ld_vary_addr_constant(struct compiler_context *ctx, uint32_t location)
132 {
133 // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
134 // ...
135 // ST_VAR.v4 T1, R12, R13, R14, R4
136
137 // R61-R62 is filled with information needed for varying interpolation
138 // This loads a vec3 with the information that ST_VAR needs to work
139
140 uint32_t mir_temp_location = alloc_mir_temp(ctx);
141 // This instruction loads a vec3 starting from the initial register
142 struct bifrost_instruction instr = {
143 .op = op_ld_var_addr,
144 .dest_components = 3,
145 .ssa_args = {
146 .dest = mir_temp_location,
147 .src0 = SSA_FIXED_REGISTER(61),
148 .src1 = SSA_FIXED_REGISTER(62),
149 .src2 = SSA_INVALID_VALUE,
150 .src3 = SSA_INVALID_VALUE,
151 },
152 .literal_args[0] = location,
153 };
154 emit_mir_instruction(ctx, instr);
155
156 return mir_temp_location;
157 }
158
159 // XXX: Doesn't support duplicated values in the components!
160 // RA WILL fail!
161 static void
162 emit_create_vector(struct compiler_context *ctx, unsigned dest, unsigned num_comps, uint32_t *comps)
163 {
164 assert(num_comps <= 4 && "Can't make a vector larger than 4 components");
165
166 // This instruction loads a vec3 starting from the initial register
167 struct bifrost_instruction instr = {
168 .op = op_create_vector,
169 .dest_components = num_comps,
170 .ssa_args = {
171 .dest = dest,
172 }
173 };
174
175 uint32_t *srcs[4] = {
176 &instr.ssa_args.src0,
177 &instr.ssa_args.src1,
178 &instr.ssa_args.src2,
179 &instr.ssa_args.src3,
180 };
181
182 for (unsigned i = 0; i < 4; ++i) {
183 if (i < num_comps)
184 *srcs[i] = comps[i];
185 else
186 *srcs[i] = SSA_INVALID_VALUE;
187 }
188 emit_mir_instruction(ctx, instr);
189 }
190
191 static uint32_t
192 emit_extract_vector_element(struct compiler_context *ctx, unsigned ssa_vector, unsigned element)
193 {
194 uint32_t mir_temp_location = alloc_mir_temp(ctx);
195 // This instruction loads a vec3 starting from the initial register
196 struct bifrost_instruction instr = {
197 .op = op_extract_element,
198 .dest_components = 1,
199 .ssa_args = {
200 .dest = mir_temp_location,
201 .src0 = ssa_vector,
202 .src1 = SSA_INVALID_VALUE,
203 .src2 = SSA_INVALID_VALUE,
204 .src3 = SSA_INVALID_VALUE,
205 },
206 .literal_args[0] = element,
207 };
208 emit_mir_instruction(ctx, instr);
209
210 return mir_temp_location;
211 }
212 static uint32_t
213 emit_movi(struct compiler_context *ctx, uint32_t literal)
214 {
215 uint32_t mir_temp_location = alloc_mir_temp(ctx);
216 // This instruction loads a vec3 starting from the initial register
217 struct bifrost_instruction instr = {
218 .op = op_movi,
219 .dest_components = 1,
220 .ssa_args = {
221 .dest = mir_temp_location,
222 .src0 = SSA_INVALID_VALUE,
223 .src1 = SSA_INVALID_VALUE,
224 .src2 = SSA_INVALID_VALUE,
225 .src3 = SSA_INVALID_VALUE,
226 },
227 .literal_args[0] = literal,
228 };
229 emit_mir_instruction(ctx, instr);
230
231 return mir_temp_location;
232 }
233
234 static unsigned
235 nir_alu_src_index_scalar(compiler_context *ctx, nir_alu_instr *nir_instr, unsigned src)
236 {
237 // NIR uses a combination of single channels plus swizzles to determine which component is pulled out of a source
238 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
239 if (!nir_alu_instr_channel_used(nir_instr, src, c))
240 continue;
241 // Pull the swizzle from this element that is active and use it as the source
242 unsigned element = nir_instr->src[src].swizzle[c];
243
244 // Create an op that extracts an element from a vector
245 return emit_extract_vector_element(ctx, nir_alu_src_index(ctx, &nir_instr->src[src]), element);
246 }
247 assert(0);
248 return 0;
249 }
250
251 static void
252 emit_intrinsic(struct compiler_context *ctx, nir_intrinsic_instr *nir_instr)
253 {
254 nir_const_value *const_offset;
255 unsigned offset, reg;
256
257 switch (nir_instr->intrinsic) {
258 case nir_intrinsic_load_ubo: {
259 nir_const_value *location = nir_src_as_const_value(nir_instr->src[0]);
260 const_offset = nir_src_as_const_value(nir_instr->src[1]);
261 assert (location && "no indirect ubo selection");
262 assert (const_offset && "no indirect inputs");
263
264 enum bifrost_ir_ops op;
265
266 // load_ubo <UBO binding>, <byte offset>
267 // ld_ubo <byte offset>, <UBO binding>
268 switch (nir_dest_num_components(nir_instr->dest)) {
269 case 1:
270 op = op_ld_ubo_v1;
271 break;
272 case 2:
273 op = op_ld_ubo_v2;
274 break;
275 case 3:
276 op = op_ld_ubo_v3;
277 break;
278 case 4:
279 op = op_ld_ubo_v4;
280 break;
281 default:
282 assert(0);
283 break;
284 }
285
286 reg = nir_dest_index(ctx, &nir_instr->dest);
287 struct bifrost_instruction instr = {
288 .op = op,
289 .dest_components = nir_dest_num_components(nir_instr->dest),
290 .ssa_args = {
291 .dest = reg,
292 .src0 = SSA_INVALID_VALUE,
293 .src1 = SSA_INVALID_VALUE,
294 .src2 = SSA_INVALID_VALUE,
295 .src3 = SSA_INVALID_VALUE,
296 },
297 .literal_args[0] = nir_src_as_uint(nir_instr->src[1]),
298 .literal_args[1] = nir_src_as_uint(nir_instr->src[0]),
299 };
300
301 emit_mir_instruction(ctx, instr);
302 break;
303 }
304 case nir_intrinsic_store_ssbo: {
305 nir_const_value *location = nir_src_as_const_value(nir_instr->src[1]);
306 const_offset = nir_src_as_const_value(nir_instr->src[2]);
307 assert (location && "no indirect ubo selection");
308 assert (const_offset && "no indirect inputs");
309
310 // store_ssbo <Value>, <binding>, <offset>
311 // store_vN <Addr>, <Value>
312 reg = nir_src_index(ctx, &nir_instr->src[0]);
313
314 enum bifrost_ir_ops op;
315 switch (nir_src_num_components(nir_instr->src[0])) {
316 case 1:
317 op = op_store_v1;
318 break;
319 case 2:
320 op = op_store_v2;
321 break;
322 case 3:
323 op = op_store_v3;
324 break;
325 case 4:
326 op = op_store_v4;
327 break;
328 default:
329 assert(0);
330 break;
331 }
332
333 struct bifrost_instruction instr = {
334 .op = op,
335 .dest_components = 0,
336 .ssa_args = {
337 .dest = SSA_INVALID_VALUE,
338 .src0 = reg,
339 .src1 = SSA_INVALID_VALUE,
340 .src2 = SSA_INVALID_VALUE,
341 .src3 = SSA_INVALID_VALUE,
342 },
343 .literal_args[0] = nir_src_as_uint(nir_instr->src[2]),
344 };
345 emit_mir_instruction(ctx, instr);
346 break;
347 }
348 case nir_intrinsic_load_uniform:
349 offset = nir_intrinsic_base(nir_instr);
350
351 if (nir_src_is_const(nir_instr->src[0])) {
352 offset += nir_src_as_uint(nir_instr->src[0]);
353 } else {
354 assert(0 && "Can't handle indirect load_uniform");
355 }
356
357 reg = nir_dest_index(ctx, &nir_instr->dest);
358
359 unsigned num_components = nir_dest_num_components(nir_instr->dest);
360 if (num_components == 1) {
361 struct bifrost_instruction instr = {
362 .op = op_mov,
363 .dest_components = 1,
364 .ssa_args = {
365 .dest = reg,
366 .src0 = SSA_FIXED_UREGISTER(offset),
367 .src1 = SSA_INVALID_VALUE,
368 .src2 = SSA_INVALID_VALUE,
369 .src3 = SSA_INVALID_VALUE,
370 },
371 };
372 emit_mir_instruction(ctx, instr);
373 } else {
374 uint32_t comps[4];
375
376 for (unsigned i = 0; i < nir_dest_num_components(nir_instr->dest); ++i) {
377 uint32_t temp_dest = alloc_mir_temp(ctx);
378 comps[i] = temp_dest;
379 struct bifrost_instruction instr = {
380 .op = op_mov,
381 .dest_components = 1,
382 .ssa_args = {
383 .dest = temp_dest,
384 .src0 = SSA_FIXED_UREGISTER(offset + (i * 4)),
385 .src1 = SSA_INVALID_VALUE,
386 .src2 = SSA_INVALID_VALUE,
387 .src3 = SSA_INVALID_VALUE,
388 },
389 };
390 emit_mir_instruction(ctx, instr);
391 }
392
393 emit_create_vector(ctx, reg, num_components, comps);
394 }
395 break;
396
397 case nir_intrinsic_load_input: {
398 const_offset = nir_src_as_const_value(nir_instr->src[0]);
399 assert (const_offset && "no indirect inputs");
400
401 offset = nir_intrinsic_base(nir_instr) + nir_src_as_uint(nir_instr->src[0]);
402
403 reg = nir_dest_index(ctx, &nir_instr->dest);
404
405 enum bifrost_ir_ops op;
406 switch (nir_dest_num_components(nir_instr->dest)) {
407 case 1:
408 op = op_ld_attr_v1;
409 break;
410 case 2:
411 op = op_ld_attr_v2;
412 break;
413 case 3:
414 op = op_ld_attr_v3;
415 break;
416 case 4:
417 op = op_ld_attr_v4;
418 break;
419 default:
420 assert(0);
421 break;
422 }
423
424 struct bifrost_instruction instr = {
425 .op = op,
426 .dest_components = nir_dest_num_components(nir_instr->dest),
427 .ssa_args = {
428 .dest = reg,
429 .src0 = offset,
430 .src1 = SSA_INVALID_VALUE,
431 .src2 = SSA_INVALID_VALUE,
432 .src3 = SSA_INVALID_VALUE,
433 }
434 };
435
436 emit_mir_instruction(ctx, instr);
437 break;
438 }
439 case nir_intrinsic_store_output: {
440 const_offset = nir_src_as_const_value(nir_instr->src[1]);
441 assert(const_offset && "no indirect outputs");
442
443 offset = nir_intrinsic_base(nir_instr);
444 if (ctx->stage == MESA_SHADER_FRAGMENT) {
445 int comp = nir_intrinsic_component(nir_instr);
446 offset += comp;
447 // XXX: Once we support more than colour output then this will need to change
448 void *entry = _mesa_hash_table_u64_search(ctx->outputs_nir_to_bi, offset + FRAG_RESULT_DATA0 + 1);
449
450 if (!entry) {
451 printf("WARNING: skipping fragment output\n");
452 break;
453 }
454
455 offset = (uintptr_t) (entry) - 1;
456 reg = nir_src_index(ctx, &nir_instr->src[0]);
457
458 enum bifrost_ir_ops op;
459 switch (nir_src_num_components(nir_instr->src[0])) {
460 case 1:
461 op = op_store_v1;
462 break;
463 case 2:
464 op = op_store_v2;
465 break;
466 case 3:
467 op = op_store_v3;
468 break;
469 case 4:
470 op = op_store_v4;
471 break;
472 default:
473 assert(0);
474 break;
475 }
476
477 // XXX: All offsets aren't vec4 aligned. Will need to adjust this in the future
478 // XXX: This needs to offset correctly in to memory so the blend step can pick it up
479 uint32_t movi = emit_movi(ctx, offset * 16);
480 uint32_t movi2 = emit_movi(ctx, 0);
481
482 uint32_t comps[2] = {
483 movi, movi2,
484 };
485 uint32_t offset_val = alloc_mir_temp(ctx);
486 emit_create_vector(ctx, offset_val, 2, comps);
487
488 struct bifrost_instruction instr = {
489 .op = op,
490 .dest_components = 0,
491 .ssa_args = {
492 .dest = SSA_INVALID_VALUE,
493 .src0 = offset_val,
494 .src1 = reg,
495 .src2 = SSA_INVALID_VALUE,
496 .src3 = SSA_INVALID_VALUE,
497 }
498 };
499 emit_mir_instruction(ctx, instr);
500 } else if (ctx->stage == MESA_SHADER_VERTEX) {
501 int comp = nir_intrinsic_component(nir_instr);
502 offset += comp;
503 void *entry = _mesa_hash_table_u64_search(ctx->varying_nir_to_bi, offset + 2);
504
505 if (!entry) {
506 printf("WARNING: skipping varying\n");
507 break;
508 }
509
510 offset = (uintptr_t) (entry) - 1;
511
512 reg = nir_src_index(ctx, &nir_instr->src[0]);
513 // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
514 // ...
515 // ST_VAR.v4 T1, R12, R13, R14, R4
516
517 offset = emit_ld_vary_addr_constant(ctx, offset);
518 enum bifrost_ir_ops op;
519 switch (nir_src_num_components(nir_instr->src[0])) {
520 case 1:
521 op = op_st_vary_v1;
522 break;
523 case 2:
524 op = op_st_vary_v2;
525 break;
526 case 3:
527 op = op_st_vary_v3;
528 break;
529 case 4:
530 op = op_st_vary_v4;
531 break;
532 default:
533 assert(0);
534 break;
535 }
536
537 struct bifrost_instruction instr = {
538 .op = op,
539 .dest_components = 0,
540 .ssa_args = {
541 .dest = SSA_INVALID_VALUE,
542 .src0 = offset,
543 .src1 = reg,
544 .src2 = SSA_INVALID_VALUE,
545 .src3 = SSA_INVALID_VALUE,
546 }
547 };
548 emit_mir_instruction(ctx, instr);
549 } else {
550 assert(0 && "Unknown store_output stage");
551 }
552 break;
553 }
554 default:
555 printf ("Unhandled intrinsic %s\n", nir_intrinsic_infos[nir_instr->intrinsic].name);
556 break;
557 }
558 }
559
560 #define ALU_CASE(arguments, nir, name) \
561 case nir_op_##nir: \
562 argument_count = arguments; \
563 op = op_##name; \
564 break
565 #define ALU_CASE_MOD(arguments, nir, name, modifiers) \
566 case nir_op_##nir: \
567 argument_count = arguments; \
568 op = op_##name; \
569 src_modifiers = modifiers; \
570 break
571
572 static void
573 emit_alu(struct compiler_context *ctx, nir_alu_instr *nir_instr)
574 {
575 unsigned dest = nir_dest_index(ctx, &nir_instr->dest.dest);
576 unsigned op = ~0U, argument_count;
577 unsigned src_modifiers = 0;
578
579 switch (nir_instr->op) {
580 ALU_CASE(2, fmul, fmul_f32);
581 ALU_CASE(2, fadd, fadd_f32);
582 ALU_CASE_MOD(2, fsub, fadd_f32, SOURCE_MODIFIER(1, SRC_MOD_NEG));
583 ALU_CASE(1, ftrunc, trunc);
584 ALU_CASE(1, fceil, ceil);
585 ALU_CASE(1, ffloor, floor);
586 ALU_CASE(1, fround_even, roundeven);
587 ALU_CASE(1, frcp, frcp_fast_f32);
588 ALU_CASE(2, fmax, max_f32);
589 ALU_CASE(2, fmin, min_f32);
590 ALU_CASE(2, iadd, add_i32);
591 ALU_CASE(2, isub, sub_i32);
592 ALU_CASE(2, imul, mul_i32);
593 ALU_CASE(2, iand, and_i32);
594 ALU_CASE(2, ior, or_i32);
595 ALU_CASE(2, ixor, xor_i32);
596 ALU_CASE(2, ishl, lshift_i32);
597 ALU_CASE(2, ushr, rshift_i32);
598 ALU_CASE(2, ishr, arshift_i32);
599 case nir_op_ineg: {
600 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
601 printf("ineg 0x%08x\n", src0);
602 struct bifrost_instruction instr = {
603 .op = op_sub_i32,
604 .dest_components = 1,
605 .ssa_args = {
606 .dest = dest,
607 .src0 = SSA_FIXED_CONST_0,
608 .src1 = src0,
609 .src2 = SSA_INVALID_VALUE,
610 .src3 = SSA_INVALID_VALUE,
611 },
612 };
613
614 emit_mir_instruction(ctx, instr);
615 return;
616
617 }
618 case nir_op_vec2: {
619 uint32_t comps[3] = {
620 nir_alu_src_index(ctx, &nir_instr->src[0]),
621 nir_alu_src_index(ctx, &nir_instr->src[1]),
622 };
623 emit_create_vector(ctx, dest, 2, comps);
624 return;
625 break;
626 }
627 case nir_op_vec3: {
628 uint32_t comps[3] = {
629 nir_alu_src_index(ctx, &nir_instr->src[0]),
630 nir_alu_src_index(ctx, &nir_instr->src[1]),
631 nir_alu_src_index(ctx, &nir_instr->src[2]),
632 };
633 emit_create_vector(ctx, dest, 3, comps);
634 return;
635 break;
636 }
637 case nir_op_vec4: {
638 uint32_t comps[4] = {
639 nir_alu_src_index(ctx, &nir_instr->src[0]),
640 nir_alu_src_index(ctx, &nir_instr->src[1]),
641 nir_alu_src_index(ctx, &nir_instr->src[2]),
642 nir_alu_src_index(ctx, &nir_instr->src[3]),
643 };
644 emit_create_vector(ctx, dest, 4, comps);
645 return;
646 break;
647 }
648 case nir_op_fdiv: {
649 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
650 unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
651 uint32_t mir_temp_location = alloc_mir_temp(ctx);
652 {
653 struct bifrost_instruction instr = {
654 .op = op_frcp_fast_f32,
655 .dest_components = 1,
656 .ssa_args = {
657 .dest = mir_temp_location,
658 .src0 = src1,
659 .src1 = SSA_INVALID_VALUE,
660 .src2 = SSA_INVALID_VALUE,
661 .src3 = SSA_INVALID_VALUE,
662 },
663 };
664 emit_mir_instruction(ctx, instr);
665 }
666
667 struct bifrost_instruction instr = {
668 .op = op_fmul_f32,
669 .dest_components = 1,
670 .ssa_args = {
671 .dest = dest,
672 .src0 = src0,
673 .src1 = src1,
674 .src2 = SSA_INVALID_VALUE,
675 .src3 = SSA_INVALID_VALUE,
676 },
677 .src_modifiers = src_modifiers,
678 };
679
680 emit_mir_instruction(ctx, instr);
681 return;
682 break;
683 }
684 case nir_op_umin:
685 case nir_op_imin:
686 case nir_op_umax:
687 case nir_op_imax: {
688 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
689 unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
690 struct bifrost_instruction instr = {
691 .op = op_csel_i32,
692 .dest_components = 1,
693 .ssa_args = {
694 .dest = dest,
695 .src0 = src0,
696 .src1 = src1,
697 .src2 = src0,
698 .src3 = src1,
699 },
700 .src_modifiers = src_modifiers,
701 .literal_args[0] = 0, /* XXX: Comparison operator */
702 };
703
704 emit_mir_instruction(ctx, instr);
705 return;
706 break;
707 }
708 case nir_op_umin3:
709 case nir_op_imin3:
710 case nir_op_umax3:
711 case nir_op_imax3: {
712 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
713 unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
714 unsigned src2 = nir_alu_src_index_scalar(ctx, nir_instr, 2);
715
716 unsigned op = 0;
717 if (nir_instr->op == nir_op_umin3)
718 op = op_umin3_i32;
719 else if (nir_instr->op == nir_op_imin3)
720 op = op_imin3_i32;
721 else if (nir_instr->op == nir_op_umax3)
722 op = op_umax3_i32;
723 else if (nir_instr->op == nir_op_imax3)
724 op = op_imax3_i32;
725 struct bifrost_instruction instr = {
726 .op = op,
727 .dest_components = 1,
728 .ssa_args = {
729 .dest = dest,
730 .src0 = src0,
731 .src1 = src1,
732 .src2 = src2,
733 .src3 = SSA_INVALID_VALUE,
734 },
735 .src_modifiers = src_modifiers,
736 };
737
738 emit_mir_instruction(ctx, instr);
739
740 return;
741 break;
742 }
743 case nir_op_ine: {
744 uint32_t movi = emit_movi(ctx, ~0U);
745 unsigned src0 = nir_alu_src_index(ctx, &nir_instr->src[0]);
746 unsigned src1 = nir_alu_src_index(ctx, &nir_instr->src[1]);
747 struct bifrost_instruction instr = {
748 .op = op_csel_i32,
749 .dest_components = 1,
750 .ssa_args = {
751 .dest = dest,
752 .src0 = src0,
753 .src1 = src1,
754 .src2 = movi,
755 .src3 = SSA_FIXED_CONST_0,
756 },
757 .src_modifiers = src_modifiers,
758 .literal_args[0] = CSEL_IEQ, /* XXX: Comparison operator */
759 };
760
761 emit_mir_instruction(ctx, instr);
762 return;
763 break;
764 }
765 default:
766 printf("Unhandled ALU op %s\n", nir_op_infos[nir_instr->op].name);
767 return;
768 }
769
770 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
771 unsigned src1 = argument_count >= 2 ? nir_alu_src_index_scalar(ctx, nir_instr, 1) : SSA_INVALID_VALUE;
772 unsigned src2 = argument_count >= 3 ? nir_alu_src_index_scalar(ctx, nir_instr, 2) : SSA_INVALID_VALUE;
773 unsigned src3 = argument_count >= 4 ? nir_alu_src_index_scalar(ctx, nir_instr, 3) : SSA_INVALID_VALUE;
774
775 struct bifrost_instruction instr = {
776 .op = op,
777 .dest_components = 1,
778 .ssa_args = {
779 .dest = dest,
780 .src0 = src0,
781 .src1 = src1,
782 .src2 = src2,
783 .src3 = src3,
784 },
785 .src_modifiers = src_modifiers,
786 };
787
788 emit_mir_instruction(ctx, instr);
789 }
790
791 static void
792 emit_instr(struct compiler_context *ctx, struct nir_instr *instr)
793 {
794 switch (instr->type) {
795 case nir_instr_type_load_const:
796 emit_load_const(ctx, nir_instr_as_load_const(instr));
797 break;
798 case nir_instr_type_intrinsic:
799 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
800 break;
801 case nir_instr_type_alu:
802 emit_alu(ctx, nir_instr_as_alu(instr));
803 break;
804 case nir_instr_type_tex:
805 printf("Unhandled NIR inst tex\n");
806 break;
807 case nir_instr_type_jump:
808 printf("Unhandled NIR inst jump\n");
809 break;
810 case nir_instr_type_ssa_undef:
811 printf("Unhandled NIR inst ssa_undef\n");
812 break;
813 default:
814 printf("Unhandled instruction type\n");
815 break;
816 }
817
818 }
819
820 static bifrost_block *
821 emit_block(struct compiler_context *ctx, nir_block *block)
822 {
823 bifrost_block *this_block = calloc(sizeof(bifrost_block), 1);
824 list_addtail(&this_block->link, &ctx->blocks);
825
826 ++ctx->block_count;
827
828 /* Add this block to be a successor to the previous block */
829 if (ctx->current_block)
830 bifrost_block_add_successor(ctx->current_block, this_block);
831
832 /* Set up current block */
833 list_inithead(&this_block->instructions);
834 ctx->current_block = this_block;
835
836 nir_foreach_instr(instr, block) {
837 emit_instr(ctx, instr);
838 ++ctx->instruction_count;
839 }
840
841 #ifdef BI_DEBUG
842 print_mir_block(this_block, false);
843 #endif
844 return this_block;
845 }
846
847 void
848 emit_if(struct compiler_context *ctx, nir_if *nir_inst);
849
850 static struct bifrost_block *
851 emit_cf_list(struct compiler_context *ctx, struct exec_list *list)
852 {
853 struct bifrost_block *start_block = NULL;
854 foreach_list_typed(nir_cf_node, node, node, list) {
855 switch (node->type) {
856 case nir_cf_node_block: {
857 bifrost_block *block = emit_block(ctx, nir_cf_node_as_block(node));
858
859 if (!start_block)
860 start_block = block;
861
862 break;
863 }
864
865 case nir_cf_node_if:
866 emit_if(ctx, nir_cf_node_as_if(node));
867 break;
868
869 default:
870 case nir_cf_node_loop:
871 case nir_cf_node_function:
872 assert(0);
873 break;
874 }
875 }
876
877 return start_block;
878 }
879
880 void
881 emit_if(struct compiler_context *ctx, nir_if *nir_inst)
882 {
883
884 // XXX: Conditional branch instruction can do a variety of comparisons with the sources
885 // Merge the source instruction `ine` with our conditional branch
886 {
887 uint32_t movi = emit_movi(ctx, ~0U);
888 struct bifrost_instruction instr = {
889 .op = op_branch,
890 .dest_components = 0,
891 .ssa_args = {
892 .dest = SSA_INVALID_VALUE,
893 .src0 = nir_src_index(ctx, &nir_inst->condition),
894 .src1 = movi,
895 .src2 = SSA_INVALID_VALUE,
896 .src3 = SSA_INVALID_VALUE,
897 },
898 .src_modifiers = 0,
899 .literal_args[0] = BR_COND_EQ, /* XXX: Comparison Arg type */
900 .literal_args[1] = 0, /* XXX: Branch target */
901 };
902
903 emit_mir_instruction(ctx, instr);
904 }
905
906 bifrost_instruction *true_branch = mir_last_instr_in_block(ctx->current_block);
907
908 bifrost_block *true_block = emit_cf_list(ctx, &nir_inst->then_list);
909
910 {
911 struct bifrost_instruction instr = {
912 .op = op_branch,
913 .dest_components = 0,
914 .ssa_args = {
915 .dest = SSA_INVALID_VALUE,
916 .src0 = SSA_INVALID_VALUE,
917 .src1 = SSA_INVALID_VALUE,
918 .src2 = SSA_INVALID_VALUE,
919 .src3 = SSA_INVALID_VALUE,
920 },
921 .src_modifiers = 0,
922 .literal_args[0] = BR_ALWAYS, /* XXX: ALWAYS */
923 .literal_args[1] = 0, /* XXX: Branch target */
924 };
925
926 emit_mir_instruction(ctx, instr);
927 }
928 bifrost_instruction *true_exit_branch = mir_last_instr_in_block(ctx->current_block);
929
930 unsigned false_idx = ctx->block_count;
931 unsigned inst_count = ctx->instruction_count;
932
933 bifrost_block *false_block = emit_cf_list(ctx, &nir_inst->else_list);
934
935 unsigned if_footer_idx = ctx->block_count;
936 assert(true_block);
937 assert(false_block);
938
939
940 if (ctx->instruction_count == inst_count) {
941 // If the else branch didn't have anything in it then we can remove the dead jump
942 mir_remove_instr(true_exit_branch);
943 } else {
944 true_exit_branch->literal_args[1] = if_footer_idx;
945 }
946
947 true_branch->literal_args[1] = false_idx;
948 }
949
950 int
951 bifrost_compile_shader_nir(nir_shader *nir, struct bifrost_program *program)
952 {
953 struct compiler_context ictx = {
954 .nir = nir,
955 .stage = nir->info.stage,
956 };
957
958 struct compiler_context *ctx = &ictx;
959
960 ctx->mir_temp = 0;
961
962 /* Initialize at a global (not block) level hash tables */
963 ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
964 ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
965
966 /* Assign actual uniform location, skipping over samplers */
967 ctx->uniform_nir_to_bi = _mesa_hash_table_u64_create(NULL);
968
969 nir_foreach_variable(var, &nir->uniforms) {
970 if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;
971
972 for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) {
973 int id = ctx->uniform_count++;
974 _mesa_hash_table_u64_insert(ctx->uniform_nir_to_bi, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
975 }
976 }
977
978 if (ctx->stage == MESA_SHADER_VERTEX) {
979 ctx->varying_nir_to_bi = _mesa_hash_table_u64_create(NULL);
980 nir_foreach_variable(var, &nir->outputs) {
981 if (var->data.location < VARYING_SLOT_VAR0) {
982 if (var->data.location == VARYING_SLOT_POS)
983 ctx->varying_count++;
984 _mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + 1, (void *) ((uintptr_t) (1)));
985
986 continue;
987 }
988
989 for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) {
990 for (int comp = 0; comp < 4; ++comp) {
991 int id = comp + ctx->varying_count++;
992 _mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + col + comp + 1, (void *) ((uintptr_t) (id + 1)));
993 }
994 }
995 }
996
997 } else if (ctx->stage == MESA_SHADER_FRAGMENT) {
998 ctx->outputs_nir_to_bi = _mesa_hash_table_u64_create(NULL);
999 nir_foreach_variable(var, &nir->outputs) {
1000 if (var->data.location >= FRAG_RESULT_DATA0 && var->data.location <= FRAG_RESULT_DATA7) {
1001 int id = ctx->outputs_count++;
1002 printf("Driver location: %d with id %d\n", var->data.location + 1, id);
1003 _mesa_hash_table_u64_insert(ctx->outputs_nir_to_bi, var->data.location + 1, (void *) ((uintptr_t) (id + 1)));
1004 }
1005 }
1006 }
1007
1008 /* Optimisation passes */
1009 optimize_nir(nir);
1010
1011 #ifdef BI_DEBUG
1012 nir_print_shader(nir, stdout);
1013 #endif
1014
1015 /* Generate machine IR for shader */
1016 nir_foreach_function(func, nir) {
1017 nir_builder _b;
1018 ctx->b = &_b;
1019 nir_builder_init(ctx->b, func->impl);
1020
1021 list_inithead(&ctx->blocks);
1022 ctx->block_count = 0;
1023 ctx->func = func;
1024
1025 emit_cf_list(ctx, &func->impl->body);
1026
1027 break; // XXX: Once we support multi function shaders then implement
1028 }
1029
1030 util_dynarray_init(&program->compiled, NULL);
1031
1032 // MIR pre-RA optimizations
1033
1034 bool progress = false;
1035
1036 do {
1037 progress = false;
1038 mir_foreach_block(ctx, block) {
1039 // XXX: Not yet working
1040 // progress |= bifrost_opt_branch_fusion(ctx, block);
1041 }
1042 } while (progress);
1043
1044 schedule_program(ctx);
1045
1046 #ifdef BI_DEBUG
1047 nir_print_shader(nir, stdout);
1048 disassemble_bifrost(program->compiled.data, program->compiled.size, false);
1049 #endif
1050 return 0;
1051 }