pan/bi: Add dummy scheduler
[mesa.git] / src / panfrost / bifrost / bifrost_compile.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "main/mtypes.h"
28 #include "compiler/glsl/glsl_to_nir.h"
29 #include "compiler/nir_types.h"
30 #include "main/imports.h"
31 #include "compiler/nir/nir_builder.h"
32
33 #include "disassemble.h"
34 #include "bifrost_compile.h"
35 #include "compiler.h"
36 #include "bi_quirks.h"
37 #include "bi_print.h"
38
39 static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
40 static bi_instruction *bi_emit_branch(bi_context *ctx);
41 static void bi_block_add_successor(bi_block *block, bi_block *successor);
42 static void bi_schedule_barrier(bi_context *ctx);
43
44 static void
45 emit_jump(bi_context *ctx, nir_jump_instr *instr)
46 {
47 bi_instruction *branch = bi_emit_branch(ctx);
48
49 switch (instr->type) {
50 case nir_jump_break:
51 branch->branch.target = ctx->break_block;
52 break;
53 case nir_jump_continue:
54 branch->branch.target = ctx->continue_block;
55 break;
56 default:
57 unreachable("Unhandled jump type");
58 }
59
60 bi_block_add_successor(ctx->current_block, branch->branch.target);
61 }
62
63 static void
64 bi_emit_ld_vary(bi_context *ctx, nir_intrinsic_instr *instr)
65 {
66 bi_instruction ins = {
67 .type = BI_LOAD_VAR,
68 .load_vary = {
69 .load = {
70 .location = nir_intrinsic_base(instr),
71 .channels = instr->num_components,
72 },
73 .interp_mode = BIFROST_INTERP_DEFAULT, /* TODO */
74 .reuse = false, /* TODO */
75 .flat = instr->intrinsic != nir_intrinsic_load_interpolated_input
76 },
77 .dest = bir_dest_index(&instr->dest),
78 .dest_type = nir_type_float | nir_dest_bit_size(instr->dest),
79 };
80
81 nir_src *offset = nir_get_io_offset_src(instr);
82
83 if (nir_src_is_const(*offset))
84 ins.load_vary.load.location += nir_src_as_uint(*offset);
85 else
86 ins.src[0] = bir_src_index(offset);
87
88 bi_emit(ctx, ins);
89 }
90
91 static void
92 bi_emit_frag_out(bi_context *ctx, nir_intrinsic_instr *instr)
93 {
94 if (!ctx->emitted_atest) {
95 bi_instruction ins = {
96 .type = BI_ATEST
97 };
98
99 bi_emit(ctx, ins);
100 bi_schedule_barrier(ctx);
101 ctx->emitted_atest = true;
102 }
103
104 bi_instruction blend = {
105 .type = BI_BLEND,
106 .blend_location = nir_intrinsic_base(instr),
107 .src = {
108 bir_src_index(&instr->src[0])
109 }
110 };
111
112 bi_emit(ctx, blend);
113 bi_schedule_barrier(ctx);
114 }
115
116 static struct bi_load
117 bi_direct_load_for_instr(nir_intrinsic_instr *instr)
118 {
119 nir_src *offset = nir_get_io_offset_src(instr);
120 assert(nir_src_is_const(*offset)); /* no indirects */
121
122 struct bi_load load = {
123 .location = nir_intrinsic_base(instr) + nir_src_as_uint(*offset),
124 .channels = instr->num_components
125 };
126
127 return load;
128 }
129
130 static void
131 bi_emit_ld_attr(bi_context *ctx, nir_intrinsic_instr *instr)
132 {
133 bi_instruction load = {
134 .type = BI_LOAD_ATTR,
135 .load = bi_direct_load_for_instr(instr),
136 .dest = bir_dest_index(&instr->dest),
137 .dest_type = nir_intrinsic_type(instr)
138 };
139
140 bi_emit(ctx, load);
141 }
142
143 static void
144 bi_emit_st_vary(bi_context *ctx, nir_intrinsic_instr *instr)
145 {
146 nir_src *offset = nir_get_io_offset_src(instr);
147 assert(nir_src_is_const(*offset)); /* no indirects */
148
149 bi_instruction address = {
150 .type = BI_LOAD_VAR_ADDRESS,
151 .load = bi_direct_load_for_instr(instr),
152 .dest_type = nir_intrinsic_type(instr),
153 .dest = bi_make_temp(ctx)
154 };
155
156 bi_instruction st = {
157 .type = BI_STORE_VAR,
158 .src = {
159 address.dest,
160 bir_src_index(&instr->src[0])
161 }
162 };
163
164 bi_emit(ctx, address);
165 bi_emit(ctx, st);
166 }
167
168 static void
169 bi_emit_ld_uniform(bi_context *ctx, nir_intrinsic_instr *instr)
170 {
171 /* TODO: Indirect access */
172
173 bi_instruction ld = {
174 .type = BI_LOAD_UNIFORM,
175 .load = bi_direct_load_for_instr(instr),
176 .dest = bir_dest_index(&instr->dest),
177 .dest_type = nir_intrinsic_type(instr),
178 .src = {
179 BIR_INDEX_ZERO /* TODO: UBOs */
180 }
181 };
182
183 bi_emit(ctx, ld);
184 }
185
186 static void
187 emit_intrinsic(bi_context *ctx, nir_intrinsic_instr *instr)
188 {
189
190 switch (instr->intrinsic) {
191 case nir_intrinsic_load_barycentric_pixel:
192 /* stub */
193 break;
194 case nir_intrinsic_load_interpolated_input:
195 case nir_intrinsic_load_input:
196 if (ctx->stage == MESA_SHADER_FRAGMENT)
197 bi_emit_ld_vary(ctx, instr);
198 else if (ctx->stage == MESA_SHADER_VERTEX)
199 bi_emit_ld_attr(ctx, instr);
200 else {
201 unreachable("Unsupported shader stage");
202 }
203 break;
204
205 case nir_intrinsic_store_output:
206 if (ctx->stage == MESA_SHADER_FRAGMENT)
207 bi_emit_frag_out(ctx, instr);
208 else if (ctx->stage == MESA_SHADER_VERTEX)
209 bi_emit_st_vary(ctx, instr);
210 else
211 unreachable("Unsupported shader stage");
212 break;
213
214 case nir_intrinsic_load_uniform:
215 bi_emit_ld_uniform(ctx, instr);
216 break;
217
218 default:
219 /* todo */
220 break;
221 }
222 }
223
224 static void
225 emit_load_const(bi_context *ctx, nir_load_const_instr *instr)
226 {
227 /* Make sure we've been lowered */
228 assert(instr->def.num_components == 1);
229
230 bi_instruction move = {
231 .type = BI_MOV,
232 .dest = bir_ssa_index(&instr->def),
233 .dest_type = instr->def.bit_size | nir_type_uint,
234 .src = {
235 BIR_INDEX_CONSTANT
236 },
237 .constant = {
238 .u64 = nir_const_value_as_uint(instr->value[0], instr->def.bit_size)
239 }
240 };
241
242 bi_emit(ctx, move);
243 }
244
245 static void
246 emit_instr(bi_context *ctx, struct nir_instr *instr)
247 {
248 switch (instr->type) {
249 case nir_instr_type_load_const:
250 emit_load_const(ctx, nir_instr_as_load_const(instr));
251 break;
252
253 case nir_instr_type_intrinsic:
254 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
255 break;
256
257 #if 0
258 case nir_instr_type_alu:
259 emit_alu(ctx, nir_instr_as_alu(instr));
260 break;
261
262 case nir_instr_type_tex:
263 emit_tex(ctx, nir_instr_as_tex(instr));
264 break;
265 #endif
266
267 case nir_instr_type_jump:
268 emit_jump(ctx, nir_instr_as_jump(instr));
269 break;
270
271 case nir_instr_type_ssa_undef:
272 /* Spurious */
273 break;
274
275 default:
276 //unreachable("Unhandled instruction type");
277 break;
278 }
279 }
280
281
282
283 static bi_block *
284 create_empty_block(bi_context *ctx)
285 {
286 bi_block *blk = rzalloc(ctx, bi_block);
287
288 blk->predecessors = _mesa_set_create(blk,
289 _mesa_hash_pointer,
290 _mesa_key_pointer_equal);
291
292 blk->name = ctx->block_name_count++;
293
294 return blk;
295 }
296
297 static void
298 bi_block_add_successor(bi_block *block, bi_block *successor)
299 {
300 assert(block);
301 assert(successor);
302
303 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
304 if (block->successors[i]) {
305 if (block->successors[i] == successor)
306 return;
307 else
308 continue;
309 }
310
311 block->successors[i] = successor;
312 _mesa_set_add(successor->predecessors, block);
313 return;
314 }
315
316 unreachable("Too many successors");
317 }
318
319 static void
320 bi_schedule_barrier(bi_context *ctx)
321 {
322 bi_block *temp = ctx->after_block;
323 ctx->after_block = create_empty_block(ctx);
324 list_addtail(&ctx->after_block->link, &ctx->blocks);
325 list_inithead(&ctx->after_block->instructions);
326 bi_block_add_successor(ctx->current_block, ctx->after_block);
327 ctx->current_block = ctx->after_block;
328 ctx->after_block = temp;
329 }
330
331 static bi_block *
332 emit_block(bi_context *ctx, nir_block *block)
333 {
334 if (ctx->after_block) {
335 ctx->current_block = ctx->after_block;
336 ctx->after_block = NULL;
337 } else {
338 ctx->current_block = create_empty_block(ctx);
339 }
340
341 list_addtail(&ctx->current_block->link, &ctx->blocks);
342 list_inithead(&ctx->current_block->instructions);
343
344 nir_foreach_instr(instr, block) {
345 emit_instr(ctx, instr);
346 ++ctx->instruction_count;
347 }
348
349 return ctx->current_block;
350 }
351
352 /* Emits an unconditional branch to the end of the current block, returning a
353 * pointer so the user can fill in details */
354
355 static bi_instruction *
356 bi_emit_branch(bi_context *ctx)
357 {
358 bi_instruction branch = {
359 .type = BI_BRANCH,
360 .branch = {
361 .cond = BI_COND_ALWAYS
362 }
363 };
364
365 return bi_emit(ctx, branch);
366 }
367
368 /* Sets a condition for a branch by examing the NIR condition. If we're
369 * familiar with the condition, we unwrap it to fold it into the branch
370 * instruction. Otherwise, we consume the condition directly. We
371 * generally use 1-bit booleans which allows us to use small types for
372 * the conditions.
373 */
374
375 static void
376 bi_set_branch_cond(bi_instruction *branch, nir_src *cond, bool invert)
377 {
378 /* TODO: Try to unwrap instead of always bailing */
379 branch->src[0] = bir_src_index(cond);
380 branch->src[1] = BIR_INDEX_ZERO;
381 branch->src_types[0] = branch->src_types[1] = nir_type_uint16;
382 branch->branch.cond = invert ? BI_COND_EQ : BI_COND_NE;
383 }
384
385 static void
386 emit_if(bi_context *ctx, nir_if *nif)
387 {
388 bi_block *before_block = ctx->current_block;
389
390 /* Speculatively emit the branch, but we can't fill it in until later */
391 bi_instruction *then_branch = bi_emit_branch(ctx);
392 bi_set_branch_cond(then_branch, &nif->condition, true);
393
394 /* Emit the two subblocks. */
395 bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
396 bi_block *end_then_block = ctx->current_block;
397
398 /* Emit a jump from the end of the then block to the end of the else */
399 bi_instruction *then_exit = bi_emit_branch(ctx);
400
401 /* Emit second block, and check if it's empty */
402
403 int count_in = ctx->instruction_count;
404 bi_block *else_block = emit_cf_list(ctx, &nif->else_list);
405 bi_block *end_else_block = ctx->current_block;
406 ctx->after_block = create_empty_block(ctx);
407
408 /* Now that we have the subblocks emitted, fix up the branches */
409
410 assert(then_block);
411 assert(else_block);
412
413 if (ctx->instruction_count == count_in) {
414 /* The else block is empty, so don't emit an exit jump */
415 bi_remove_instruction(then_exit);
416 then_branch->branch.target = ctx->after_block;
417 } else {
418 then_branch->branch.target = else_block;
419 then_exit->branch.target = ctx->after_block;
420 bi_block_add_successor(end_then_block, then_exit->branch.target);
421 }
422
423 /* Wire up the successors */
424
425 bi_block_add_successor(before_block, then_branch->branch.target); /* then_branch */
426
427 bi_block_add_successor(before_block, then_block); /* fallthrough */
428 bi_block_add_successor(end_else_block, ctx->after_block); /* fallthrough */
429 }
430
431 static void
432 emit_loop(bi_context *ctx, nir_loop *nloop)
433 {
434 /* Remember where we are */
435 bi_block *start_block = ctx->current_block;
436
437 bi_block *saved_break = ctx->break_block;
438 bi_block *saved_continue = ctx->continue_block;
439
440 ctx->continue_block = create_empty_block(ctx);
441 ctx->break_block = create_empty_block(ctx);
442 ctx->after_block = ctx->continue_block;
443
444 /* Emit the body itself */
445 emit_cf_list(ctx, &nloop->body);
446
447 /* Branch back to loop back */
448 bi_instruction *br_back = bi_emit_branch(ctx);
449 br_back->branch.target = ctx->continue_block;
450 bi_block_add_successor(start_block, ctx->continue_block);
451 bi_block_add_successor(ctx->current_block, ctx->continue_block);
452
453 ctx->after_block = ctx->break_block;
454
455 /* Pop off */
456 ctx->break_block = saved_break;
457 ctx->continue_block = saved_continue;
458 ++ctx->loop_count;
459 }
460
461 static bi_block *
462 emit_cf_list(bi_context *ctx, struct exec_list *list)
463 {
464 bi_block *start_block = NULL;
465
466 foreach_list_typed(nir_cf_node, node, node, list) {
467 switch (node->type) {
468 case nir_cf_node_block: {
469 bi_block *block = emit_block(ctx, nir_cf_node_as_block(node));
470
471 if (!start_block)
472 start_block = block;
473
474 break;
475 }
476
477 case nir_cf_node_if:
478 emit_if(ctx, nir_cf_node_as_if(node));
479 break;
480
481 case nir_cf_node_loop:
482 emit_loop(ctx, nir_cf_node_as_loop(node));
483 break;
484
485 default:
486 unreachable("Unknown control flow");
487 }
488 }
489
490 return start_block;
491 }
492
493 static int
494 glsl_type_size(const struct glsl_type *type, bool bindless)
495 {
496 return glsl_count_attribute_slots(type, false);
497 }
498
499 static void
500 bi_optimize_nir(nir_shader *nir)
501 {
502 bool progress;
503 unsigned lower_flrp = 16 | 32 | 64;
504
505 NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
506 NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
507
508 nir_lower_tex_options lower_tex_options = {
509 .lower_txs_lod = true,
510 .lower_txp = ~0,
511 .lower_tex_without_implicit_lod = true,
512 .lower_txd = true,
513 };
514
515 NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
516 NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
517 NIR_PASS(progress, nir, nir_lower_load_const_to_scalar);
518
519 do {
520 progress = false;
521
522 NIR_PASS(progress, nir, nir_lower_var_copies);
523 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
524
525 NIR_PASS(progress, nir, nir_copy_prop);
526 NIR_PASS(progress, nir, nir_opt_remove_phis);
527 NIR_PASS(progress, nir, nir_opt_dce);
528 NIR_PASS(progress, nir, nir_opt_dead_cf);
529 NIR_PASS(progress, nir, nir_opt_cse);
530 NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
531 NIR_PASS(progress, nir, nir_opt_algebraic);
532 NIR_PASS(progress, nir, nir_opt_constant_folding);
533
534 if (lower_flrp != 0) {
535 bool lower_flrp_progress = false;
536 NIR_PASS(lower_flrp_progress,
537 nir,
538 nir_lower_flrp,
539 lower_flrp,
540 false /* always_precise */,
541 nir->options->lower_ffma);
542 if (lower_flrp_progress) {
543 NIR_PASS(progress, nir,
544 nir_opt_constant_folding);
545 progress = true;
546 }
547
548 /* Nothing should rematerialize any flrps, so we only
549 * need to do this lowering once.
550 */
551 lower_flrp = 0;
552 }
553
554 NIR_PASS(progress, nir, nir_opt_undef);
555 NIR_PASS(progress, nir, nir_opt_loop_unroll,
556 nir_var_shader_in |
557 nir_var_shader_out |
558 nir_var_function_temp);
559 } while (progress);
560
561 NIR_PASS(progress, nir, nir_opt_algebraic_late);
562 NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
563 NIR_PASS(progress, nir, nir_lower_load_const_to_scalar);
564
565 /* Take us out of SSA */
566 NIR_PASS(progress, nir, nir_lower_locals_to_regs);
567 NIR_PASS(progress, nir, nir_convert_from_ssa, true);
568 }
569
570 void
571 bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id)
572 {
573 bi_context *ctx = rzalloc(NULL, bi_context);
574 ctx->nir = nir;
575 ctx->stage = nir->info.stage;
576 ctx->quirks = bifrost_get_quirks(product_id);
577 list_inithead(&ctx->blocks);
578
579 /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
580 * (so we don't accidentally duplicate the epilogue since mesa/st has
581 * messed with our I/O quite a bit already) */
582
583 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
584
585 if (ctx->stage == MESA_SHADER_VERTEX) {
586 NIR_PASS_V(nir, nir_lower_viewport_transform);
587 NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0);
588 }
589
590 NIR_PASS_V(nir, nir_split_var_copies);
591 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
592 NIR_PASS_V(nir, nir_lower_var_copies);
593 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
594 NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
595 NIR_PASS_V(nir, nir_lower_ssbo);
596
597 bi_optimize_nir(nir);
598 nir_print_shader(nir, stdout);
599
600 nir_foreach_function(func, nir) {
601 if (!func->impl)
602 continue;
603
604 ctx->impl = func->impl;
605 emit_cf_list(ctx, &func->impl->body);
606 break; /* TODO: Multi-function shaders */
607 }
608
609 bi_print_shader(ctx, stdout);
610 bi_schedule(ctx);
611 bi_print_shader(ctx, stdout);
612
613 ralloc_free(ctx);
614 }