68dcd31b95c8235ea9e5bb95007ffe77829b6d17
[mesa.git] / src / panfrost / bifrost / bifrost_compile.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "main/mtypes.h"
28 #include "compiler/glsl/glsl_to_nir.h"
29 #include "compiler/nir_types.h"
30 #include "main/imports.h"
31 #include "compiler/nir/nir_builder.h"
32
33 #include "disassemble.h"
34 #include "bifrost_compile.h"
35 #include "compiler.h"
36 #include "bi_quirks.h"
37 #include "bi_print.h"
38
39 static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
40 static bi_instruction *bi_emit_branch(bi_context *ctx);
41 static void bi_block_add_successor(bi_block *block, bi_block *successor);
42
43 static void
44 emit_jump(bi_context *ctx, nir_jump_instr *instr)
45 {
46 bi_instruction *branch = bi_emit_branch(ctx);
47
48 switch (instr->type) {
49 case nir_jump_break:
50 branch->branch.target = ctx->break_block;
51 break;
52 case nir_jump_continue:
53 branch->branch.target = ctx->continue_block;
54 break;
55 default:
56 unreachable("Unhandled jump type");
57 }
58
59 bi_block_add_successor(ctx->current_block, branch->branch.target);
60 }
61
62 static void
63 emit_instr(bi_context *ctx, struct nir_instr *instr)
64 {
65 switch (instr->type) {
66 #if 0
67 case nir_instr_type_load_const:
68 emit_load_const(ctx, nir_instr_as_load_const(instr));
69 break;
70
71 case nir_instr_type_intrinsic:
72 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
73 break;
74
75 case nir_instr_type_alu:
76 emit_alu(ctx, nir_instr_as_alu(instr));
77 break;
78
79 case nir_instr_type_tex:
80 emit_tex(ctx, nir_instr_as_tex(instr));
81 break;
82 #endif
83
84 case nir_instr_type_jump:
85 emit_jump(ctx, nir_instr_as_jump(instr));
86 break;
87
88 case nir_instr_type_ssa_undef:
89 /* Spurious */
90 break;
91
92 default:
93 //unreachable("Unhandled instruction type");
94 break;
95 }
96 }
97
98
99
100 static bi_block *
101 create_empty_block(bi_context *ctx)
102 {
103 bi_block *blk = rzalloc(ctx, bi_block);
104
105 blk->predecessors = _mesa_set_create(blk,
106 _mesa_hash_pointer,
107 _mesa_key_pointer_equal);
108
109 blk->name = ctx->block_name_count++;
110
111 return blk;
112 }
113
114 static void
115 bi_block_add_successor(bi_block *block, bi_block *successor)
116 {
117 assert(block);
118 assert(successor);
119
120 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
121 if (block->successors[i]) {
122 if (block->successors[i] == successor)
123 return;
124 else
125 continue;
126 }
127
128 block->successors[i] = successor;
129 _mesa_set_add(successor->predecessors, block);
130 return;
131 }
132
133 unreachable("Too many successors");
134 }
135
136 static bi_block *
137 emit_block(bi_context *ctx, nir_block *block)
138 {
139 if (ctx->after_block) {
140 ctx->current_block = ctx->after_block;
141 ctx->after_block = NULL;
142 } else {
143 ctx->current_block = create_empty_block(ctx);
144 }
145
146 list_addtail(&ctx->current_block->link, &ctx->blocks);
147 list_inithead(&ctx->current_block->instructions);
148
149 nir_foreach_instr(instr, block) {
150 emit_instr(ctx, instr);
151 ++ctx->instruction_count;
152 }
153
154 return ctx->current_block;
155 }
156
157 /* Emits an unconditional branch to the end of the current block, returning a
158 * pointer so the user can fill in details */
159
160 static bi_instruction *
161 bi_emit_branch(bi_context *ctx)
162 {
163 bi_instruction branch = {
164 .type = BI_BRANCH,
165 .branch = {
166 .cond = BI_COND_ALWAYS
167 }
168 };
169
170 return bi_emit(ctx, branch);
171 }
172
173 /* Sets a condition for a branch by examing the NIR condition. If we're
174 * familiar with the condition, we unwrap it to fold it into the branch
175 * instruction. Otherwise, we consume the condition directly. We
176 * generally use 1-bit booleans which allows us to use small types for
177 * the conditions.
178 */
179
180 static void
181 bi_set_branch_cond(bi_instruction *branch, nir_src *cond, bool invert)
182 {
183 /* TODO: Try to unwrap instead of always bailing */
184 branch->src[0] = bir_src_index(cond);
185 branch->src[1] = BIR_INDEX_ZERO;
186 branch->src_types[0] = branch->src_types[1] = nir_type_uint16;
187 branch->branch.cond = invert ? BI_COND_EQ : BI_COND_NE;
188 }
189
190 static void
191 emit_if(bi_context *ctx, nir_if *nif)
192 {
193 bi_block *before_block = ctx->current_block;
194
195 /* Speculatively emit the branch, but we can't fill it in until later */
196 bi_instruction *then_branch = bi_emit_branch(ctx);
197 bi_set_branch_cond(then_branch, &nif->condition, true);
198
199 /* Emit the two subblocks. */
200 bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
201 bi_block *end_then_block = ctx->current_block;
202
203 /* Emit a jump from the end of the then block to the end of the else */
204 bi_instruction *then_exit = bi_emit_branch(ctx);
205
206 /* Emit second block, and check if it's empty */
207
208 int count_in = ctx->instruction_count;
209 bi_block *else_block = emit_cf_list(ctx, &nif->else_list);
210 bi_block *end_else_block = ctx->current_block;
211 ctx->after_block = create_empty_block(ctx);
212
213 /* Now that we have the subblocks emitted, fix up the branches */
214
215 assert(then_block);
216 assert(else_block);
217
218 if (ctx->instruction_count == count_in) {
219 /* The else block is empty, so don't emit an exit jump */
220 bi_remove_instruction(then_exit);
221 then_branch->branch.target = ctx->after_block;
222 } else {
223 then_branch->branch.target = else_block;
224 then_exit->branch.target = ctx->after_block;
225 bi_block_add_successor(end_then_block, then_exit->branch.target);
226 }
227
228 /* Wire up the successors */
229
230 bi_block_add_successor(before_block, then_branch->branch.target); /* then_branch */
231
232 bi_block_add_successor(before_block, then_block); /* fallthrough */
233 bi_block_add_successor(end_else_block, ctx->after_block); /* fallthrough */
234 }
235
236 static void
237 emit_loop(bi_context *ctx, nir_loop *nloop)
238 {
239 /* Remember where we are */
240 bi_block *start_block = ctx->current_block;
241
242 bi_block *saved_break = ctx->break_block;
243 bi_block *saved_continue = ctx->continue_block;
244
245 ctx->continue_block = create_empty_block(ctx);
246 ctx->break_block = create_empty_block(ctx);
247 ctx->after_block = ctx->continue_block;
248
249 /* Emit the body itself */
250 emit_cf_list(ctx, &nloop->body);
251
252 /* Branch back to loop back */
253 bi_instruction *br_back = bi_emit_branch(ctx);
254 br_back->branch.target = ctx->continue_block;
255 bi_block_add_successor(start_block, ctx->continue_block);
256 bi_block_add_successor(ctx->current_block, ctx->continue_block);
257
258 ctx->after_block = ctx->break_block;
259
260 /* Pop off */
261 ctx->break_block = saved_break;
262 ctx->continue_block = saved_continue;
263 ++ctx->loop_count;
264 }
265
266 static bi_block *
267 emit_cf_list(bi_context *ctx, struct exec_list *list)
268 {
269 bi_block *start_block = NULL;
270
271 foreach_list_typed(nir_cf_node, node, node, list) {
272 switch (node->type) {
273 case nir_cf_node_block: {
274 bi_block *block = emit_block(ctx, nir_cf_node_as_block(node));
275
276 if (!start_block)
277 start_block = block;
278
279 break;
280 }
281
282 case nir_cf_node_if:
283 emit_if(ctx, nir_cf_node_as_if(node));
284 break;
285
286 case nir_cf_node_loop:
287 emit_loop(ctx, nir_cf_node_as_loop(node));
288 break;
289
290 default:
291 unreachable("Unknown control flow");
292 }
293 }
294
295 return start_block;
296 }
297
298 static int
299 glsl_type_size(const struct glsl_type *type, bool bindless)
300 {
301 return glsl_count_attribute_slots(type, false);
302 }
303
304 static void
305 bi_optimize_nir(nir_shader *nir)
306 {
307 bool progress;
308 unsigned lower_flrp = 16 | 32 | 64;
309
310 NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
311 NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
312
313 nir_lower_tex_options lower_tex_options = {
314 .lower_txs_lod = true,
315 .lower_txp = ~0,
316 .lower_tex_without_implicit_lod = true,
317 .lower_txd = true,
318 };
319
320 NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
321
322 do {
323 progress = false;
324
325 NIR_PASS(progress, nir, nir_lower_var_copies);
326 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
327
328 NIR_PASS(progress, nir, nir_copy_prop);
329 NIR_PASS(progress, nir, nir_opt_remove_phis);
330 NIR_PASS(progress, nir, nir_opt_dce);
331 NIR_PASS(progress, nir, nir_opt_dead_cf);
332 NIR_PASS(progress, nir, nir_opt_cse);
333 NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
334 NIR_PASS(progress, nir, nir_opt_algebraic);
335 NIR_PASS(progress, nir, nir_opt_constant_folding);
336
337 if (lower_flrp != 0) {
338 bool lower_flrp_progress = false;
339 NIR_PASS(lower_flrp_progress,
340 nir,
341 nir_lower_flrp,
342 lower_flrp,
343 false /* always_precise */,
344 nir->options->lower_ffma);
345 if (lower_flrp_progress) {
346 NIR_PASS(progress, nir,
347 nir_opt_constant_folding);
348 progress = true;
349 }
350
351 /* Nothing should rematerialize any flrps, so we only
352 * need to do this lowering once.
353 */
354 lower_flrp = 0;
355 }
356
357 NIR_PASS(progress, nir, nir_opt_undef);
358 NIR_PASS(progress, nir, nir_opt_loop_unroll,
359 nir_var_shader_in |
360 nir_var_shader_out |
361 nir_var_function_temp);
362 } while (progress);
363
364 NIR_PASS(progress, nir, nir_opt_algebraic_late);
365
366 /* Take us out of SSA */
367 NIR_PASS(progress, nir, nir_lower_locals_to_regs);
368 NIR_PASS(progress, nir, nir_convert_from_ssa, true);
369 }
370
371 void
372 bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id)
373 {
374 bi_context *ctx = rzalloc(NULL, bi_context);
375 ctx->nir = nir;
376 ctx->stage = nir->info.stage;
377 ctx->quirks = bifrost_get_quirks(product_id);
378 list_inithead(&ctx->blocks);
379
380 /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
381 * (so we don't accidentally duplicate the epilogue since mesa/st has
382 * messed with our I/O quite a bit already) */
383
384 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
385
386 if (ctx->stage == MESA_SHADER_VERTEX) {
387 NIR_PASS_V(nir, nir_lower_viewport_transform);
388 NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0);
389 }
390
391 NIR_PASS_V(nir, nir_split_var_copies);
392 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
393 NIR_PASS_V(nir, nir_lower_var_copies);
394 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
395 NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
396 NIR_PASS_V(nir, nir_lower_ssbo);
397
398 /* We have to lower ALU to scalar ourselves since viewport
399 * transformations produce vector ops */
400 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
401
402 bi_optimize_nir(nir);
403 nir_print_shader(nir, stdout);
404
405 nir_foreach_function(func, nir) {
406 if (!func->impl)
407 continue;
408
409 emit_cf_list(ctx, &func->impl->body);
410 break; /* TODO: Multi-function shaders */
411 }
412
413 bi_print_shader(ctx, stdout);
414
415 ralloc_free(ctx);
416 }