From dff4986b1aa22990828861ccca234b52455cbeb6 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Date: Mon, 12 Aug 2019 12:36:46 -0700
Subject: [PATCH] pan/midgard: Emit store_output branch just-in-time

We'll need multiple branches for MRT, so we can't defer. Also, we need
to track dependencies to ensure r0 is set to the correct value for each
store_output.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
---
 src/panfrost/midgard/compiler.h        |  3 -
 src/panfrost/midgard/midgard_compile.c | 86 +++++++++++++++++++-------
 2 files changed, 62 insertions(+), 27 deletions(-)

diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index 52113dda33f..57c6c06df6d 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -268,9 +268,6 @@ typedef struct compiler_context {
         /* Alpha ref value passed in */
         float alpha_ref;
 
-        /* The index corresponding to the fragment output */
-        unsigned fragment_output;
-
         /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
         unsigned sysvals[MAX_SYSVAL_COUNT];
         unsigned sysval_count;
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 91e257db187..499dbb6e9c3 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1342,6 +1342,64 @@ compute_builtin_arg(nir_op op)
         }
 }
 
+/* Emit store for a fragment shader, which is encoded via a fancy branch. TODO:
+ * Handle MRT here */
+
+static void
+emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
+{
+        /* First, move in whatever we're outputting */
+        midgard_instruction move = v_mov(src, blank_alu_src, SSA_FIXED_REGISTER(0));
+        if (rt != 0) {
+                /* Force a tight schedule. TODO: Make the scheduler MRT aware */
+                move.unit = UNIT_VMUL;
+                move.precede_break = true;
+                move.dont_eliminate = true;
+        }
+
+        emit_mir_instruction(ctx, move);
+
+        /* If we're doing MRT, we need to specify the render target */
+
+        midgard_instruction rt_move = {
+                .ssa_args = {
+                        .dest = -1
+                }
+        };
+
+        if (rt != 0) {
+                /* We'll write to r1.z */
+                rt_move = v_mov(-1, blank_alu_src, SSA_FIXED_REGISTER(1));
+                rt_move.mask = 1 << COMPONENT_Z;
+                rt_move.unit = UNIT_SADD;
+
+                /* r1.z = (rt * 0x100) */
+                rt_move.ssa_args.inline_constant = true;
+                rt_move.inline_constant = (rt * 0x100);
+
+                /* r1 */
+                ctx->work_registers = MAX2(ctx->work_registers, 1);
+
+                /* Do the write */
+                emit_mir_instruction(ctx, rt_move);
+        }
+
+        /* Next, generate the branch. For R render targets in the writeout, the
+         * i'th render target jumps to pseudo-offset [2(R-1) + i] */
+
+        unsigned offset = (2 * (ctx->nir->num_outputs - 1)) + rt;
+
+        struct midgard_instruction ins =
+                v_alu_br_compact_cond(midgard_jmp_writeout_op_writeout, TAG_ALU_4, offset, midgard_condition_always);
+
+        /* Add dependencies */
+        ins.ssa_args.src[0] = move.ssa_args.dest;
+        ins.ssa_args.src[1] = rt_move.ssa_args.dest;
+
+        /* Emit the branch */
+        emit_mir_instruction(ctx, ins);
+}
+
 static void
 emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
 {
@@ -1501,19 +1559,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                 reg = nir_src_index(ctx, &instr->src[0]);
 
                 if (ctx->stage == MESA_SHADER_FRAGMENT) {
-                        /* gl_FragColor is not emitted with load/store
-                         * instructions. Instead, it gets plonked into
-                         * r0 at the end of the shader and we do the
-                         * framebuffer writeout dance. TODO: Defer
-                         * writes */
-
-                        midgard_instruction move = v_mov(reg, blank_alu_src, SSA_FIXED_REGISTER(0));
-                        emit_mir_instruction(ctx, move);
-
-                        /* Save the index we're writing to for later reference
-                         * in the epilogue */
-
-                        ctx->fragment_output = reg;
+                        /* Determine number of render targets */
+                        emit_fragment_store(ctx, reg, offset);
                 } else if (ctx->stage == MESA_SHADER_VERTEX) {
                         /* We should have been vectorized, though we don't
                          * currently check that st_vary is emitted only once
@@ -1543,10 +1590,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
         case nir_intrinsic_store_raw_output_pan:
                 assert (ctx->stage == MESA_SHADER_FRAGMENT);
                 reg = nir_src_index(ctx, &instr->src[0]);
-
-                midgard_instruction move = v_mov(reg, blank_alu_src, SSA_FIXED_REGISTER(0));
-                emit_mir_instruction(ctx, move);
-                ctx->fragment_output = reg;
+                emit_fragment_store(ctx, reg, 0);
 
                 break;
 
@@ -2202,13 +2246,7 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block)
 static void
 emit_fragment_epilogue(compiler_context *ctx)
 {
-        emit_explicit_constant(ctx, ctx->fragment_output, SSA_FIXED_REGISTER(0));
-
-        /* Perform the actual fragment writeout. We have two writeout/branch
-         * instructions, forming a loop until writeout is successful as per the
-         * docs. TODO: gl_FragDepth */
-
-        EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, 0, midgard_condition_always);
+        /* Just emit the last chunk with the branch */
         EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, -1, midgard_condition_always);
 }
 
-- 
2.30.2