pan/bi: Implement nir_intrsinic_load_interpolated_input
[mesa.git] / src / panfrost / bifrost / bifrost_compile.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "main/mtypes.h"
28 #include "compiler/glsl/glsl_to_nir.h"
29 #include "compiler/nir_types.h"
30 #include "main/imports.h"
31 #include "compiler/nir/nir_builder.h"
32
33 #include "disassemble.h"
34 #include "bifrost_compile.h"
35 #include "compiler.h"
36 #include "bi_quirks.h"
37 #include "bi_print.h"
38
39 static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
40 static bi_instruction *bi_emit_branch(bi_context *ctx);
41 static void bi_block_add_successor(bi_block *block, bi_block *successor);
42
43 static void
44 emit_jump(bi_context *ctx, nir_jump_instr *instr)
45 {
46 bi_instruction *branch = bi_emit_branch(ctx);
47
48 switch (instr->type) {
49 case nir_jump_break:
50 branch->branch.target = ctx->break_block;
51 break;
52 case nir_jump_continue:
53 branch->branch.target = ctx->continue_block;
54 break;
55 default:
56 unreachable("Unhandled jump type");
57 }
58
59 bi_block_add_successor(ctx->current_block, branch->branch.target);
60 }
61
62 static void
63 bi_emit_ld_vary(bi_context *ctx, nir_intrinsic_instr *instr)
64 {
65 bi_instruction ins = {
66 .type = BI_LOAD_VAR,
67 .load_vary = {
68 .load = {
69 .location = nir_intrinsic_base(instr),
70 .channels = instr->num_components,
71 },
72 .interp_mode = BIFROST_INTERP_DEFAULT, /* TODO */
73 .reuse = false, /* TODO */
74 .flat = instr->intrinsic != nir_intrinsic_load_interpolated_input
75 },
76 .dest = bir_dest_index(&instr->dest),
77 .dest_type = nir_type_float | nir_dest_bit_size(instr->dest),
78 };
79
80 nir_src *offset = nir_get_io_offset_src(instr);
81
82 if (nir_src_is_const(*offset))
83 ins.load_vary.load.location += nir_src_as_uint(*offset);
84 else
85 ins.src[0] = bir_src_index(offset);
86
87 bi_emit(ctx, ins);
88 }
89
90 static void
91 emit_intrinsic(bi_context *ctx, nir_intrinsic_instr *instr)
92 {
93
94 switch (instr->intrinsic) {
95 case nir_intrinsic_load_barycentric_pixel:
96 /* stub */
97 break;
98 case nir_intrinsic_load_interpolated_input:
99 bi_emit_ld_vary(ctx, instr);
100 break;
101 default:
102 /* todo */
103 break;
104 }
105 }
106
107 static void
108 emit_instr(bi_context *ctx, struct nir_instr *instr)
109 {
110 switch (instr->type) {
111 #if 0
112 case nir_instr_type_load_const:
113 emit_load_const(ctx, nir_instr_as_load_const(instr));
114 break;
115 #endif
116
117 case nir_instr_type_intrinsic:
118 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
119 break;
120
121 #if 0
122 case nir_instr_type_alu:
123 emit_alu(ctx, nir_instr_as_alu(instr));
124 break;
125
126 case nir_instr_type_tex:
127 emit_tex(ctx, nir_instr_as_tex(instr));
128 break;
129 #endif
130
131 case nir_instr_type_jump:
132 emit_jump(ctx, nir_instr_as_jump(instr));
133 break;
134
135 case nir_instr_type_ssa_undef:
136 /* Spurious */
137 break;
138
139 default:
140 //unreachable("Unhandled instruction type");
141 break;
142 }
143 }
144
145
146
147 static bi_block *
148 create_empty_block(bi_context *ctx)
149 {
150 bi_block *blk = rzalloc(ctx, bi_block);
151
152 blk->predecessors = _mesa_set_create(blk,
153 _mesa_hash_pointer,
154 _mesa_key_pointer_equal);
155
156 blk->name = ctx->block_name_count++;
157
158 return blk;
159 }
160
161 static void
162 bi_block_add_successor(bi_block *block, bi_block *successor)
163 {
164 assert(block);
165 assert(successor);
166
167 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
168 if (block->successors[i]) {
169 if (block->successors[i] == successor)
170 return;
171 else
172 continue;
173 }
174
175 block->successors[i] = successor;
176 _mesa_set_add(successor->predecessors, block);
177 return;
178 }
179
180 unreachable("Too many successors");
181 }
182
183 static bi_block *
184 emit_block(bi_context *ctx, nir_block *block)
185 {
186 if (ctx->after_block) {
187 ctx->current_block = ctx->after_block;
188 ctx->after_block = NULL;
189 } else {
190 ctx->current_block = create_empty_block(ctx);
191 }
192
193 list_addtail(&ctx->current_block->link, &ctx->blocks);
194 list_inithead(&ctx->current_block->instructions);
195
196 nir_foreach_instr(instr, block) {
197 emit_instr(ctx, instr);
198 ++ctx->instruction_count;
199 }
200
201 return ctx->current_block;
202 }
203
204 /* Emits an unconditional branch to the end of the current block, returning a
205 * pointer so the user can fill in details */
206
207 static bi_instruction *
208 bi_emit_branch(bi_context *ctx)
209 {
210 bi_instruction branch = {
211 .type = BI_BRANCH,
212 .branch = {
213 .cond = BI_COND_ALWAYS
214 }
215 };
216
217 return bi_emit(ctx, branch);
218 }
219
220 /* Sets a condition for a branch by examing the NIR condition. If we're
221 * familiar with the condition, we unwrap it to fold it into the branch
222 * instruction. Otherwise, we consume the condition directly. We
223 * generally use 1-bit booleans which allows us to use small types for
224 * the conditions.
225 */
226
227 static void
228 bi_set_branch_cond(bi_instruction *branch, nir_src *cond, bool invert)
229 {
230 /* TODO: Try to unwrap instead of always bailing */
231 branch->src[0] = bir_src_index(cond);
232 branch->src[1] = BIR_INDEX_ZERO;
233 branch->src_types[0] = branch->src_types[1] = nir_type_uint16;
234 branch->branch.cond = invert ? BI_COND_EQ : BI_COND_NE;
235 }
236
237 static void
238 emit_if(bi_context *ctx, nir_if *nif)
239 {
240 bi_block *before_block = ctx->current_block;
241
242 /* Speculatively emit the branch, but we can't fill it in until later */
243 bi_instruction *then_branch = bi_emit_branch(ctx);
244 bi_set_branch_cond(then_branch, &nif->condition, true);
245
246 /* Emit the two subblocks. */
247 bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
248 bi_block *end_then_block = ctx->current_block;
249
250 /* Emit a jump from the end of the then block to the end of the else */
251 bi_instruction *then_exit = bi_emit_branch(ctx);
252
253 /* Emit second block, and check if it's empty */
254
255 int count_in = ctx->instruction_count;
256 bi_block *else_block = emit_cf_list(ctx, &nif->else_list);
257 bi_block *end_else_block = ctx->current_block;
258 ctx->after_block = create_empty_block(ctx);
259
260 /* Now that we have the subblocks emitted, fix up the branches */
261
262 assert(then_block);
263 assert(else_block);
264
265 if (ctx->instruction_count == count_in) {
266 /* The else block is empty, so don't emit an exit jump */
267 bi_remove_instruction(then_exit);
268 then_branch->branch.target = ctx->after_block;
269 } else {
270 then_branch->branch.target = else_block;
271 then_exit->branch.target = ctx->after_block;
272 bi_block_add_successor(end_then_block, then_exit->branch.target);
273 }
274
275 /* Wire up the successors */
276
277 bi_block_add_successor(before_block, then_branch->branch.target); /* then_branch */
278
279 bi_block_add_successor(before_block, then_block); /* fallthrough */
280 bi_block_add_successor(end_else_block, ctx->after_block); /* fallthrough */
281 }
282
283 static void
284 emit_loop(bi_context *ctx, nir_loop *nloop)
285 {
286 /* Remember where we are */
287 bi_block *start_block = ctx->current_block;
288
289 bi_block *saved_break = ctx->break_block;
290 bi_block *saved_continue = ctx->continue_block;
291
292 ctx->continue_block = create_empty_block(ctx);
293 ctx->break_block = create_empty_block(ctx);
294 ctx->after_block = ctx->continue_block;
295
296 /* Emit the body itself */
297 emit_cf_list(ctx, &nloop->body);
298
299 /* Branch back to loop back */
300 bi_instruction *br_back = bi_emit_branch(ctx);
301 br_back->branch.target = ctx->continue_block;
302 bi_block_add_successor(start_block, ctx->continue_block);
303 bi_block_add_successor(ctx->current_block, ctx->continue_block);
304
305 ctx->after_block = ctx->break_block;
306
307 /* Pop off */
308 ctx->break_block = saved_break;
309 ctx->continue_block = saved_continue;
310 ++ctx->loop_count;
311 }
312
313 static bi_block *
314 emit_cf_list(bi_context *ctx, struct exec_list *list)
315 {
316 bi_block *start_block = NULL;
317
318 foreach_list_typed(nir_cf_node, node, node, list) {
319 switch (node->type) {
320 case nir_cf_node_block: {
321 bi_block *block = emit_block(ctx, nir_cf_node_as_block(node));
322
323 if (!start_block)
324 start_block = block;
325
326 break;
327 }
328
329 case nir_cf_node_if:
330 emit_if(ctx, nir_cf_node_as_if(node));
331 break;
332
333 case nir_cf_node_loop:
334 emit_loop(ctx, nir_cf_node_as_loop(node));
335 break;
336
337 default:
338 unreachable("Unknown control flow");
339 }
340 }
341
342 return start_block;
343 }
344
345 static int
346 glsl_type_size(const struct glsl_type *type, bool bindless)
347 {
348 return glsl_count_attribute_slots(type, false);
349 }
350
351 static void
352 bi_optimize_nir(nir_shader *nir)
353 {
354 bool progress;
355 unsigned lower_flrp = 16 | 32 | 64;
356
357 NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
358 NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
359
360 nir_lower_tex_options lower_tex_options = {
361 .lower_txs_lod = true,
362 .lower_txp = ~0,
363 .lower_tex_without_implicit_lod = true,
364 .lower_txd = true,
365 };
366
367 NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
368
369 do {
370 progress = false;
371
372 NIR_PASS(progress, nir, nir_lower_var_copies);
373 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
374
375 NIR_PASS(progress, nir, nir_copy_prop);
376 NIR_PASS(progress, nir, nir_opt_remove_phis);
377 NIR_PASS(progress, nir, nir_opt_dce);
378 NIR_PASS(progress, nir, nir_opt_dead_cf);
379 NIR_PASS(progress, nir, nir_opt_cse);
380 NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
381 NIR_PASS(progress, nir, nir_opt_algebraic);
382 NIR_PASS(progress, nir, nir_opt_constant_folding);
383
384 if (lower_flrp != 0) {
385 bool lower_flrp_progress = false;
386 NIR_PASS(lower_flrp_progress,
387 nir,
388 nir_lower_flrp,
389 lower_flrp,
390 false /* always_precise */,
391 nir->options->lower_ffma);
392 if (lower_flrp_progress) {
393 NIR_PASS(progress, nir,
394 nir_opt_constant_folding);
395 progress = true;
396 }
397
398 /* Nothing should rematerialize any flrps, so we only
399 * need to do this lowering once.
400 */
401 lower_flrp = 0;
402 }
403
404 NIR_PASS(progress, nir, nir_opt_undef);
405 NIR_PASS(progress, nir, nir_opt_loop_unroll,
406 nir_var_shader_in |
407 nir_var_shader_out |
408 nir_var_function_temp);
409 } while (progress);
410
411 NIR_PASS(progress, nir, nir_opt_algebraic_late);
412
413 /* Take us out of SSA */
414 NIR_PASS(progress, nir, nir_lower_locals_to_regs);
415 NIR_PASS(progress, nir, nir_convert_from_ssa, true);
416 }
417
418 void
419 bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id)
420 {
421 bi_context *ctx = rzalloc(NULL, bi_context);
422 ctx->nir = nir;
423 ctx->stage = nir->info.stage;
424 ctx->quirks = bifrost_get_quirks(product_id);
425 list_inithead(&ctx->blocks);
426
427 /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
428 * (so we don't accidentally duplicate the epilogue since mesa/st has
429 * messed with our I/O quite a bit already) */
430
431 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
432
433 if (ctx->stage == MESA_SHADER_VERTEX) {
434 NIR_PASS_V(nir, nir_lower_viewport_transform);
435 NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0);
436 }
437
438 NIR_PASS_V(nir, nir_split_var_copies);
439 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
440 NIR_PASS_V(nir, nir_lower_var_copies);
441 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
442 NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
443 NIR_PASS_V(nir, nir_lower_ssbo);
444
445 /* We have to lower ALU to scalar ourselves since viewport
446 * transformations produce vector ops */
447 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
448
449 bi_optimize_nir(nir);
450 nir_print_shader(nir, stdout);
451
452 nir_foreach_function(func, nir) {
453 if (!func->impl)
454 continue;
455
456 emit_cf_list(ctx, &func->impl->body);
457 break; /* TODO: Multi-function shaders */
458 }
459
460 bi_print_shader(ctx, stdout);
461
462 ralloc_free(ctx);
463 }