pan/bi: Add bi_schedule_barrier helper
[mesa.git] / src / panfrost / bifrost / bifrost_compile.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "main/mtypes.h"
28 #include "compiler/glsl/glsl_to_nir.h"
29 #include "compiler/nir_types.h"
30 #include "main/imports.h"
31 #include "compiler/nir/nir_builder.h"
32
33 #include "disassemble.h"
34 #include "bifrost_compile.h"
35 #include "compiler.h"
36 #include "bi_quirks.h"
37 #include "bi_print.h"
38
39 static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
40 static bi_instruction *bi_emit_branch(bi_context *ctx);
41 static void bi_block_add_successor(bi_block *block, bi_block *successor);
42 static void bi_schedule_barrier(bi_context *ctx);
43
44 static void
45 emit_jump(bi_context *ctx, nir_jump_instr *instr)
46 {
47 bi_instruction *branch = bi_emit_branch(ctx);
48
49 switch (instr->type) {
50 case nir_jump_break:
51 branch->branch.target = ctx->break_block;
52 break;
53 case nir_jump_continue:
54 branch->branch.target = ctx->continue_block;
55 break;
56 default:
57 unreachable("Unhandled jump type");
58 }
59
60 bi_block_add_successor(ctx->current_block, branch->branch.target);
61 }
62
63 static void
64 bi_emit_ld_vary(bi_context *ctx, nir_intrinsic_instr *instr)
65 {
66 bi_instruction ins = {
67 .type = BI_LOAD_VAR,
68 .load_vary = {
69 .load = {
70 .location = nir_intrinsic_base(instr),
71 .channels = instr->num_components,
72 },
73 .interp_mode = BIFROST_INTERP_DEFAULT, /* TODO */
74 .reuse = false, /* TODO */
75 .flat = instr->intrinsic != nir_intrinsic_load_interpolated_input
76 },
77 .dest = bir_dest_index(&instr->dest),
78 .dest_type = nir_type_float | nir_dest_bit_size(instr->dest),
79 };
80
81 nir_src *offset = nir_get_io_offset_src(instr);
82
83 if (nir_src_is_const(*offset))
84 ins.load_vary.load.location += nir_src_as_uint(*offset);
85 else
86 ins.src[0] = bir_src_index(offset);
87
88 bi_emit(ctx, ins);
89 }
90
91 static void
92 emit_intrinsic(bi_context *ctx, nir_intrinsic_instr *instr)
93 {
94
95 switch (instr->intrinsic) {
96 case nir_intrinsic_load_barycentric_pixel:
97 /* stub */
98 break;
99 case nir_intrinsic_load_interpolated_input:
100 bi_emit_ld_vary(ctx, instr);
101 break;
102 default:
103 /* todo */
104 break;
105 }
106 }
107
108 static void
109 emit_instr(bi_context *ctx, struct nir_instr *instr)
110 {
111 switch (instr->type) {
112 #if 0
113 case nir_instr_type_load_const:
114 emit_load_const(ctx, nir_instr_as_load_const(instr));
115 break;
116 #endif
117
118 case nir_instr_type_intrinsic:
119 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
120 break;
121
122 #if 0
123 case nir_instr_type_alu:
124 emit_alu(ctx, nir_instr_as_alu(instr));
125 break;
126
127 case nir_instr_type_tex:
128 emit_tex(ctx, nir_instr_as_tex(instr));
129 break;
130 #endif
131
132 case nir_instr_type_jump:
133 emit_jump(ctx, nir_instr_as_jump(instr));
134 break;
135
136 case nir_instr_type_ssa_undef:
137 /* Spurious */
138 break;
139
140 default:
141 //unreachable("Unhandled instruction type");
142 break;
143 }
144 }
145
146
147
148 static bi_block *
149 create_empty_block(bi_context *ctx)
150 {
151 bi_block *blk = rzalloc(ctx, bi_block);
152
153 blk->predecessors = _mesa_set_create(blk,
154 _mesa_hash_pointer,
155 _mesa_key_pointer_equal);
156
157 blk->name = ctx->block_name_count++;
158
159 return blk;
160 }
161
162 static void
163 bi_block_add_successor(bi_block *block, bi_block *successor)
164 {
165 assert(block);
166 assert(successor);
167
168 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
169 if (block->successors[i]) {
170 if (block->successors[i] == successor)
171 return;
172 else
173 continue;
174 }
175
176 block->successors[i] = successor;
177 _mesa_set_add(successor->predecessors, block);
178 return;
179 }
180
181 unreachable("Too many successors");
182 }
183
184 static void
185 bi_schedule_barrier(bi_context *ctx)
186 {
187 bi_block *temp = ctx->after_block;
188 ctx->after_block = create_empty_block(ctx);
189 list_addtail(&ctx->after_block->link, &ctx->blocks);
190 list_inithead(&ctx->after_block->instructions);
191 bi_block_add_successor(ctx->current_block, ctx->after_block);
192 ctx->current_block = ctx->after_block;
193 ctx->after_block = temp;
194 }
195
196 static bi_block *
197 emit_block(bi_context *ctx, nir_block *block)
198 {
199 if (ctx->after_block) {
200 ctx->current_block = ctx->after_block;
201 ctx->after_block = NULL;
202 } else {
203 ctx->current_block = create_empty_block(ctx);
204 }
205
206 list_addtail(&ctx->current_block->link, &ctx->blocks);
207 list_inithead(&ctx->current_block->instructions);
208
209 nir_foreach_instr(instr, block) {
210 emit_instr(ctx, instr);
211 ++ctx->instruction_count;
212 }
213
214 return ctx->current_block;
215 }
216
217 /* Emits an unconditional branch to the end of the current block, returning a
218 * pointer so the user can fill in details */
219
220 static bi_instruction *
221 bi_emit_branch(bi_context *ctx)
222 {
223 bi_instruction branch = {
224 .type = BI_BRANCH,
225 .branch = {
226 .cond = BI_COND_ALWAYS
227 }
228 };
229
230 return bi_emit(ctx, branch);
231 }
232
233 /* Sets a condition for a branch by examing the NIR condition. If we're
234 * familiar with the condition, we unwrap it to fold it into the branch
235 * instruction. Otherwise, we consume the condition directly. We
236 * generally use 1-bit booleans which allows us to use small types for
237 * the conditions.
238 */
239
240 static void
241 bi_set_branch_cond(bi_instruction *branch, nir_src *cond, bool invert)
242 {
243 /* TODO: Try to unwrap instead of always bailing */
244 branch->src[0] = bir_src_index(cond);
245 branch->src[1] = BIR_INDEX_ZERO;
246 branch->src_types[0] = branch->src_types[1] = nir_type_uint16;
247 branch->branch.cond = invert ? BI_COND_EQ : BI_COND_NE;
248 }
249
250 static void
251 emit_if(bi_context *ctx, nir_if *nif)
252 {
253 bi_block *before_block = ctx->current_block;
254
255 /* Speculatively emit the branch, but we can't fill it in until later */
256 bi_instruction *then_branch = bi_emit_branch(ctx);
257 bi_set_branch_cond(then_branch, &nif->condition, true);
258
259 /* Emit the two subblocks. */
260 bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
261 bi_block *end_then_block = ctx->current_block;
262
263 /* Emit a jump from the end of the then block to the end of the else */
264 bi_instruction *then_exit = bi_emit_branch(ctx);
265
266 /* Emit second block, and check if it's empty */
267
268 int count_in = ctx->instruction_count;
269 bi_block *else_block = emit_cf_list(ctx, &nif->else_list);
270 bi_block *end_else_block = ctx->current_block;
271 ctx->after_block = create_empty_block(ctx);
272
273 /* Now that we have the subblocks emitted, fix up the branches */
274
275 assert(then_block);
276 assert(else_block);
277
278 if (ctx->instruction_count == count_in) {
279 /* The else block is empty, so don't emit an exit jump */
280 bi_remove_instruction(then_exit);
281 then_branch->branch.target = ctx->after_block;
282 } else {
283 then_branch->branch.target = else_block;
284 then_exit->branch.target = ctx->after_block;
285 bi_block_add_successor(end_then_block, then_exit->branch.target);
286 }
287
288 /* Wire up the successors */
289
290 bi_block_add_successor(before_block, then_branch->branch.target); /* then_branch */
291
292 bi_block_add_successor(before_block, then_block); /* fallthrough */
293 bi_block_add_successor(end_else_block, ctx->after_block); /* fallthrough */
294 }
295
296 static void
297 emit_loop(bi_context *ctx, nir_loop *nloop)
298 {
299 /* Remember where we are */
300 bi_block *start_block = ctx->current_block;
301
302 bi_block *saved_break = ctx->break_block;
303 bi_block *saved_continue = ctx->continue_block;
304
305 ctx->continue_block = create_empty_block(ctx);
306 ctx->break_block = create_empty_block(ctx);
307 ctx->after_block = ctx->continue_block;
308
309 /* Emit the body itself */
310 emit_cf_list(ctx, &nloop->body);
311
312 /* Branch back to loop back */
313 bi_instruction *br_back = bi_emit_branch(ctx);
314 br_back->branch.target = ctx->continue_block;
315 bi_block_add_successor(start_block, ctx->continue_block);
316 bi_block_add_successor(ctx->current_block, ctx->continue_block);
317
318 ctx->after_block = ctx->break_block;
319
320 /* Pop off */
321 ctx->break_block = saved_break;
322 ctx->continue_block = saved_continue;
323 ++ctx->loop_count;
324 }
325
326 static bi_block *
327 emit_cf_list(bi_context *ctx, struct exec_list *list)
328 {
329 bi_block *start_block = NULL;
330
331 foreach_list_typed(nir_cf_node, node, node, list) {
332 switch (node->type) {
333 case nir_cf_node_block: {
334 bi_block *block = emit_block(ctx, nir_cf_node_as_block(node));
335
336 if (!start_block)
337 start_block = block;
338
339 break;
340 }
341
342 case nir_cf_node_if:
343 emit_if(ctx, nir_cf_node_as_if(node));
344 break;
345
346 case nir_cf_node_loop:
347 emit_loop(ctx, nir_cf_node_as_loop(node));
348 break;
349
350 default:
351 unreachable("Unknown control flow");
352 }
353 }
354
355 return start_block;
356 }
357
358 static int
359 glsl_type_size(const struct glsl_type *type, bool bindless)
360 {
361 return glsl_count_attribute_slots(type, false);
362 }
363
364 static void
365 bi_optimize_nir(nir_shader *nir)
366 {
367 bool progress;
368 unsigned lower_flrp = 16 | 32 | 64;
369
370 NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
371 NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
372
373 nir_lower_tex_options lower_tex_options = {
374 .lower_txs_lod = true,
375 .lower_txp = ~0,
376 .lower_tex_without_implicit_lod = true,
377 .lower_txd = true,
378 };
379
380 NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
381
382 do {
383 progress = false;
384
385 NIR_PASS(progress, nir, nir_lower_var_copies);
386 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
387
388 NIR_PASS(progress, nir, nir_copy_prop);
389 NIR_PASS(progress, nir, nir_opt_remove_phis);
390 NIR_PASS(progress, nir, nir_opt_dce);
391 NIR_PASS(progress, nir, nir_opt_dead_cf);
392 NIR_PASS(progress, nir, nir_opt_cse);
393 NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
394 NIR_PASS(progress, nir, nir_opt_algebraic);
395 NIR_PASS(progress, nir, nir_opt_constant_folding);
396
397 if (lower_flrp != 0) {
398 bool lower_flrp_progress = false;
399 NIR_PASS(lower_flrp_progress,
400 nir,
401 nir_lower_flrp,
402 lower_flrp,
403 false /* always_precise */,
404 nir->options->lower_ffma);
405 if (lower_flrp_progress) {
406 NIR_PASS(progress, nir,
407 nir_opt_constant_folding);
408 progress = true;
409 }
410
411 /* Nothing should rematerialize any flrps, so we only
412 * need to do this lowering once.
413 */
414 lower_flrp = 0;
415 }
416
417 NIR_PASS(progress, nir, nir_opt_undef);
418 NIR_PASS(progress, nir, nir_opt_loop_unroll,
419 nir_var_shader_in |
420 nir_var_shader_out |
421 nir_var_function_temp);
422 } while (progress);
423
424 NIR_PASS(progress, nir, nir_opt_algebraic_late);
425
426 /* Take us out of SSA */
427 NIR_PASS(progress, nir, nir_lower_locals_to_regs);
428 NIR_PASS(progress, nir, nir_convert_from_ssa, true);
429 }
430
431 void
432 bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id)
433 {
434 bi_context *ctx = rzalloc(NULL, bi_context);
435 ctx->nir = nir;
436 ctx->stage = nir->info.stage;
437 ctx->quirks = bifrost_get_quirks(product_id);
438 list_inithead(&ctx->blocks);
439
440 /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
441 * (so we don't accidentally duplicate the epilogue since mesa/st has
442 * messed with our I/O quite a bit already) */
443
444 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
445
446 if (ctx->stage == MESA_SHADER_VERTEX) {
447 NIR_PASS_V(nir, nir_lower_viewport_transform);
448 NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0);
449 }
450
451 NIR_PASS_V(nir, nir_split_var_copies);
452 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
453 NIR_PASS_V(nir, nir_lower_var_copies);
454 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
455 NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
456 NIR_PASS_V(nir, nir_lower_ssbo);
457
458 /* We have to lower ALU to scalar ourselves since viewport
459 * transformations produce vector ops */
460 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
461
462 bi_optimize_nir(nir);
463 nir_print_shader(nir, stdout);
464
465 nir_foreach_function(func, nir) {
466 if (!func->impl)
467 continue;
468
469 emit_cf_list(ctx, &func->impl->body);
470 break; /* TODO: Multi-function shaders */
471 }
472
473 bi_print_shader(ctx, stdout);
474
475 ralloc_free(ctx);
476 }