From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Date: Sat, 23 Nov 2019 21:08:02 +0000 (-0500)
Subject: pan/midgard: Writeout per render target
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=60396340f5b9bef009e8bc34696a981f5e2b3ae2;p=mesa.git

pan/midgard: Writeout per render target

The flow is considerably more complicated. Instead of one writeout loop
like usual, we have a separate write loop for each render target. This
requires some scheduling shenanigans to get right.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Tomeu Visoso <tomeu.vizoso@collabora.com>
---

diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 8eb1b176e5f..a118f34fd9a 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1330,51 +1330,26 @@ compute_builtin_arg(nir_op op)
 
 /* Emit store for a fragment shader, which is encoded via a fancy branch. TODO:
  * Handle MRT here */
+static void
+emit_fragment_epilogue(compiler_context *ctx, unsigned rt);
 
 static void
 emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
 {
         emit_explicit_constant(ctx, src, src);
 
-        /* If we're doing MRT, we need to specify the render target */
-
-        midgard_instruction rt_move = {
-                .dest = ~0
-        };
-
-        if (rt != 0) {
-                /* We'll write to r1.z */
-                rt_move = v_mov(~0, SSA_FIXED_REGISTER(1));
-                rt_move.mask = 1 << COMPONENT_Z;
-                rt_move.unit = UNIT_SADD;
-
-                /* r1.z = (rt * 0x100) */
-                rt_move.has_inline_constant = true;
-                rt_move.inline_constant = (rt * 0x100);
-
-                /* r1 */
-                ctx->work_registers = MAX2(ctx->work_registers, 1);
-
-                /* Do the write */
-                emit_mir_instruction(ctx, rt_move);
-        }
-
-        /* Next, generate the branch. For R render targets in the writeout, the
-         * i'th render target jumps to pseudo-offset [2(R-1) + i] */
-
-        unsigned outputs = ctx->is_blend ? 1 : ctx->nir->num_outputs;
-        unsigned offset = (2 * (outputs - 1)) + rt;
-
         struct midgard_instruction ins =
-                v_alu_br_compact_cond(midgard_jmp_writeout_op_writeout, TAG_ALU_4, offset, midgard_condition_always);
+                v_alu_br_compact_cond(midgard_jmp_writeout_op_writeout, TAG_ALU_4, 0, midgard_condition_always);
 
         /* Add dependencies */
         ins.src[0] = src;
-        ins.src[1] = rt_move.dest;
+        ins.constants[0] = rt * 0x100;
 
         /* Emit the branch */
         emit_mir_instruction(ctx, ins);
         schedule_barrier(ctx);
+
+        emit_fragment_epilogue(ctx, rt);
 }
 
 static void
@@ -2229,10 +2204,20 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block)
 }
 
 static void
-emit_fragment_epilogue(compiler_context *ctx)
+emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
 {
-        /* Just emit the last chunk with the branch */
+        /* Include a move to specify the render target */
+
+        if (rt > 0) {
+                midgard_instruction rt_move = v_mov(SSA_FIXED_REGISTER(1),
+                                SSA_FIXED_REGISTER(1));
+                rt_move.mask = 1 << COMPONENT_Z;
+                rt_move.unit = UNIT_SADD;
+                emit_mir_instruction(ctx, rt_move);
+        }
+
         EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, ~0, midgard_condition_always);
+        schedule_barrier(ctx);
 }
 
 static midgard_block *
@@ -2524,19 +2509,6 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
                 ctx->func = func;
 
                 emit_cf_list(ctx, &func->impl->body);
-
-                /* Emit empty exit block with successor */
-
-                struct midgard_block *semi_end = ctx->current_block;
-
-                struct midgard_block *end =
-                        emit_block(ctx, func->impl->end_block);
-
-                if (ctx->stage == MESA_SHADER_FRAGMENT)
-                        emit_fragment_epilogue(ctx);
-
-                midgard_block_add_successor(semi_end, end);
-
                 break; /* TODO: Multi-function shaders */
         }
 
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c
index c084a896a40..749df2efb67 100644
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -524,10 +524,15 @@ allocate_registers(compiler_context *ctx, bool *spilled)
                 assert(check_read_class(l->class, ins->type, ins->src[2]));
         }
 
-        /* Mark writeout to r0 */
+        /* Mark writeout to r0, render target to r1.z */
         mir_foreach_instr_global(ctx, ins) {
-                if (ins->compact_branch && ins->writeout && ins->src[0] < ctx->temp_count)
+                if (!(ins->compact_branch && ins->writeout)) continue;
+
+                if (ins->src[0] < ctx->temp_count)
                         l->solutions[ins->src[0]] = 0;
+
+                if (ins->src[1] < ctx->temp_count)
+                        l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4;
         }
         
         mir_compute_interference(ctx, l);
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index 6d83330a502..592abc7f5e8 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -910,6 +910,19 @@ mir_schedule_alu(
                         unreachable("Bad condition");
         }
 
+        /* If we have a render target reference, schedule a move for it */
+
+        if (branch && branch->writeout && branch->constants[0]) {
+                midgard_instruction mov = v_mov(~0, make_compiler_temp(ctx));
+                sadd = mem_dup(&mov, sizeof(midgard_instruction));
+                sadd->unit = UNIT_SADD;
+                sadd->mask = 0x1;
+                sadd->has_inline_constant = true;
+                sadd->inline_constant = branch->constants[0];
+                branch->src[1] = mov.dest;
+                /* TODO: Don't leak */
+        }
+
         /* Stage 2, let's schedule sadd before vmul for writeout */
         mir_choose_alu(&sadd, instructions, worklist, len, &predicate, UNIT_SADD);