pan/bi: Handle loops when ingesting CFG
[mesa.git] / src / panfrost / bifrost / bifrost_compile.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "main/mtypes.h"
28 #include "compiler/glsl/glsl_to_nir.h"
29 #include "compiler/nir_types.h"
30 #include "main/imports.h"
31 #include "compiler/nir/nir_builder.h"
32
33 #include "disassemble.h"
34 #include "bifrost_compile.h"
35 #include "compiler.h"
36 #include "bi_quirks.h"
37 #include "bi_print.h"
38
39 static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
40
41 static bi_block *
42 create_empty_block(bi_context *ctx)
43 {
44 bi_block *blk = rzalloc(ctx, bi_block);
45
46 blk->predecessors = _mesa_set_create(blk,
47 _mesa_hash_pointer,
48 _mesa_key_pointer_equal);
49
50 blk->name = ctx->block_name_count++;
51
52 return blk;
53 }
54
55 static void
56 bi_block_add_successor(bi_block *block, bi_block *successor)
57 {
58 assert(block);
59 assert(successor);
60
61 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
62 if (block->successors[i]) {
63 if (block->successors[i] == successor)
64 return;
65 else
66 continue;
67 }
68
69 block->successors[i] = successor;
70 _mesa_set_add(successor->predecessors, block);
71 return;
72 }
73
74 unreachable("Too many successors");
75 }
76
77 static bi_block *
78 emit_block(bi_context *ctx, nir_block *block)
79 {
80 if (ctx->after_block) {
81 ctx->current_block = ctx->after_block;
82 ctx->after_block = NULL;
83 } else {
84 ctx->current_block = create_empty_block(ctx);
85 }
86
87 list_addtail(&ctx->current_block->link, &ctx->blocks);
88 list_inithead(&ctx->current_block->instructions);
89
90 nir_foreach_instr(instr, block) {
91 //emit_instr(ctx, instr);
92 ++ctx->instruction_count;
93 }
94
95 return ctx->current_block;
96 }
97
98 /* Emits an unconditional branch to the end of the current block, returning a
99 * pointer so the user can fill in details */
100
101 static bi_instruction *
102 bi_emit_branch(bi_context *ctx)
103 {
104 bi_instruction branch = {
105 .type = BI_BRANCH,
106 .branch = {
107 .cond = BI_COND_ALWAYS
108 }
109 };
110
111 return bi_emit(ctx, branch);
112 }
113
114 /* Sets a condition for a branch by examing the NIR condition. If we're
115 * familiar with the condition, we unwrap it to fold it into the branch
116 * instruction. Otherwise, we consume the condition directly. We
117 * generally use 1-bit booleans which allows us to use small types for
118 * the conditions.
119 */
120
121 static void
122 bi_set_branch_cond(bi_instruction *branch, nir_src *cond, bool invert)
123 {
124 /* TODO: Try to unwrap instead of always bailing */
125 branch->src[0] = bir_src_index(cond);
126 branch->src[1] = BIR_INDEX_ZERO;
127 branch->src_types[0] = branch->src_types[1] = nir_type_uint16;
128 branch->branch.cond = invert ? BI_COND_EQ : BI_COND_NE;
129 }
130
131 static void
132 emit_if(bi_context *ctx, nir_if *nif)
133 {
134 bi_block *before_block = ctx->current_block;
135
136 /* Speculatively emit the branch, but we can't fill it in until later */
137 bi_instruction *then_branch = bi_emit_branch(ctx);
138 bi_set_branch_cond(then_branch, &nif->condition, true);
139
140 /* Emit the two subblocks. */
141 bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
142 bi_block *end_then_block = ctx->current_block;
143
144 /* Emit a jump from the end of the then block to the end of the else */
145 bi_instruction *then_exit = bi_emit_branch(ctx);
146
147 /* Emit second block, and check if it's empty */
148
149 int count_in = ctx->instruction_count;
150 bi_block *else_block = emit_cf_list(ctx, &nif->else_list);
151 bi_block *end_else_block = ctx->current_block;
152 ctx->after_block = create_empty_block(ctx);
153
154 /* Now that we have the subblocks emitted, fix up the branches */
155
156 assert(then_block);
157 assert(else_block);
158
159 if (ctx->instruction_count == count_in) {
160 /* The else block is empty, so don't emit an exit jump */
161 bi_remove_instruction(then_exit);
162 then_branch->branch.target = ctx->after_block;
163 } else {
164 then_branch->branch.target = else_block;
165 then_exit->branch.target = ctx->after_block;
166 bi_block_add_successor(end_then_block, then_exit->branch.target);
167 }
168
169 /* Wire up the successors */
170
171 bi_block_add_successor(before_block, then_branch->branch.target); /* then_branch */
172
173 bi_block_add_successor(before_block, then_block); /* fallthrough */
174 bi_block_add_successor(end_else_block, ctx->after_block); /* fallthrough */
175 }
176
177 static void
178 emit_loop(bi_context *ctx, nir_loop *nloop)
179 {
180 /* Remember where we are */
181 bi_block *start_block = ctx->current_block;
182
183 bi_block *saved_break = ctx->break_block;
184 bi_block *saved_continue = ctx->continue_block;
185
186 ctx->continue_block = create_empty_block(ctx);
187 ctx->break_block = create_empty_block(ctx);
188 ctx->after_block = ctx->continue_block;
189
190 /* Emit the body itself */
191 emit_cf_list(ctx, &nloop->body);
192
193 /* Branch back to loop back */
194 bi_instruction *br_back = bi_emit_branch(ctx);
195 br_back->branch.target = ctx->continue_block;
196 bi_block_add_successor(start_block, ctx->continue_block);
197 bi_block_add_successor(ctx->current_block, ctx->continue_block);
198
199 ctx->after_block = ctx->break_block;
200
201 /* Pop off */
202 ctx->break_block = saved_break;
203 ctx->continue_block = saved_continue;
204 ++ctx->loop_count;
205 }
206
207 static bi_block *
208 emit_cf_list(bi_context *ctx, struct exec_list *list)
209 {
210 bi_block *start_block = NULL;
211
212 foreach_list_typed(nir_cf_node, node, node, list) {
213 switch (node->type) {
214 case nir_cf_node_block: {
215 bi_block *block = emit_block(ctx, nir_cf_node_as_block(node));
216
217 if (!start_block)
218 start_block = block;
219
220 break;
221 }
222
223 case nir_cf_node_if:
224 emit_if(ctx, nir_cf_node_as_if(node));
225 break;
226
227 case nir_cf_node_loop:
228 emit_loop(ctx, nir_cf_node_as_loop(node));
229 break;
230
231 default:
232 unreachable("Unknown control flow");
233 }
234 }
235
236 return start_block;
237 }
238
239 static int
240 glsl_type_size(const struct glsl_type *type, bool bindless)
241 {
242 return glsl_count_attribute_slots(type, false);
243 }
244
245 static void
246 bi_optimize_nir(nir_shader *nir)
247 {
248 bool progress;
249 unsigned lower_flrp = 16 | 32 | 64;
250
251 NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
252 NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
253
254 nir_lower_tex_options lower_tex_options = {
255 .lower_txs_lod = true,
256 .lower_txp = ~0,
257 .lower_tex_without_implicit_lod = true,
258 .lower_txd = true,
259 };
260
261 NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
262
263 do {
264 progress = false;
265
266 NIR_PASS(progress, nir, nir_lower_var_copies);
267 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
268
269 NIR_PASS(progress, nir, nir_copy_prop);
270 NIR_PASS(progress, nir, nir_opt_remove_phis);
271 NIR_PASS(progress, nir, nir_opt_dce);
272 NIR_PASS(progress, nir, nir_opt_dead_cf);
273 NIR_PASS(progress, nir, nir_opt_cse);
274 NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
275 NIR_PASS(progress, nir, nir_opt_algebraic);
276 NIR_PASS(progress, nir, nir_opt_constant_folding);
277
278 if (lower_flrp != 0) {
279 bool lower_flrp_progress = false;
280 NIR_PASS(lower_flrp_progress,
281 nir,
282 nir_lower_flrp,
283 lower_flrp,
284 false /* always_precise */,
285 nir->options->lower_ffma);
286 if (lower_flrp_progress) {
287 NIR_PASS(progress, nir,
288 nir_opt_constant_folding);
289 progress = true;
290 }
291
292 /* Nothing should rematerialize any flrps, so we only
293 * need to do this lowering once.
294 */
295 lower_flrp = 0;
296 }
297
298 NIR_PASS(progress, nir, nir_opt_undef);
299 NIR_PASS(progress, nir, nir_opt_loop_unroll,
300 nir_var_shader_in |
301 nir_var_shader_out |
302 nir_var_function_temp);
303 } while (progress);
304
305 NIR_PASS(progress, nir, nir_opt_algebraic_late);
306
307 /* Take us out of SSA */
308 NIR_PASS(progress, nir, nir_lower_locals_to_regs);
309 NIR_PASS(progress, nir, nir_convert_from_ssa, true);
310 }
311
312 void
313 bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id)
314 {
315 bi_context *ctx = rzalloc(NULL, bi_context);
316 ctx->nir = nir;
317 ctx->stage = nir->info.stage;
318 ctx->quirks = bifrost_get_quirks(product_id);
319 list_inithead(&ctx->blocks);
320
321 /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
322 * (so we don't accidentally duplicate the epilogue since mesa/st has
323 * messed with our I/O quite a bit already) */
324
325 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
326
327 if (ctx->stage == MESA_SHADER_VERTEX) {
328 NIR_PASS_V(nir, nir_lower_viewport_transform);
329 NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0);
330 }
331
332 NIR_PASS_V(nir, nir_split_var_copies);
333 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
334 NIR_PASS_V(nir, nir_lower_var_copies);
335 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
336 NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
337 NIR_PASS_V(nir, nir_lower_ssbo);
338
339 /* We have to lower ALU to scalar ourselves since viewport
340 * transformations produce vector ops */
341 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
342
343 bi_optimize_nir(nir);
344 nir_print_shader(nir, stdout);
345
346 nir_foreach_function(func, nir) {
347 if (!func->impl)
348 continue;
349
350 emit_cf_list(ctx, &func->impl->body);
351 break; /* TODO: Multi-function shaders */
352 }
353
354 bi_print_shader(ctx, stdout);
355
356 ralloc_free(ctx);
357 }