pan/midgard: Generate MRT writeout loops
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 2 Jan 2020 17:27:59 +0000 (12:27 -0500)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 2 Jan 2020 20:20:55 +0000 (15:20 -0500)
They need a very particular form; the naive way we did before is not
sufficient in practice, it doesn't look like. So let's follow the rough
structure of the blob's writeout since this is fixed code anyway.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/panfrost/midgard/compiler.h
src/panfrost/midgard/midgard_compile.c
src/panfrost/midgard/midgard_liveness.c
src/panfrost/midgard/midgard_ra.c
src/panfrost/midgard/midgard_schedule.c

index 1ab0d4dd0d12f609917c715bb72258d884659faf..18ff2572b48422d5506360cb64a7e922515e249a 100644 (file)
@@ -110,6 +110,7 @@ typedef struct midgard_instruction {
 
         bool compact_branch;
         bool writeout;
+        bool last_writeout;
 
         /* Kind of a hack, but hint against aggressive DCE */
         bool dont_eliminate;
@@ -218,6 +219,7 @@ typedef struct midgard_bundle {
         bool has_embedded_constants;
         float constants[4];
         bool has_blend_constant;
+        bool last_writeout;
 } midgard_bundle;
 
 typedef struct compiler_context {
@@ -303,6 +305,9 @@ typedef struct compiler_context {
 
         /* Model-specific quirk set */
         uint32_t quirks;
+
+        /* Writeout instructions for each render target */
+        midgard_instruction *writeout_branch[4];
 } compiler_context;
 
 /* Per-block live_in/live_out */
index 306d63374b154cdc3385dade79334fc66b2ec842..24765f3da2ab8d4c0c353c50406234787716e621 100644 (file)
@@ -1331,11 +1331,6 @@ compute_builtin_arg(nir_op op)
         }
 }
 
-/* Emit store for a fragment shader, which is encoded via a fancy branch. TODO:
- * Handle MRT here */
-static void
-emit_fragment_epilogue(compiler_context *ctx, unsigned rt);
-
 static void
 emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
 {
@@ -1353,9 +1348,15 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
         /* Emit the branch */
         midgard_instruction *br = emit_mir_instruction(ctx, ins);
         schedule_barrier(ctx);
-        br->branch.target_block = ctx->block_count - 1;
 
-        emit_fragment_epilogue(ctx, rt);
+        assert(rt < ARRAY_SIZE(ctx->writeout_branch));
+        assert(!ctx->writeout_branch[rt]);
+        ctx->writeout_branch[rt] = br;
+
+        /* Push our current location = current block count - 1 = where we'll
+         * jump to. Maybe a bit too clever for my own good */
+
+        br->branch.target_block = ctx->block_count - 1;
 }
 
 static void
@@ -2284,28 +2285,20 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block)
         return progress;
 }
 
-static void
+static unsigned
 emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
 {
-        /* Include a move to specify the render target */
-
-        if (rt > 0) {
-                midgard_instruction rt_move = v_mov(SSA_FIXED_REGISTER(1),
-                                SSA_FIXED_REGISTER(1));
-                rt_move.mask = 1 << COMPONENT_Z;
-                rt_move.unit = UNIT_SADD;
-                emit_mir_instruction(ctx, rt_move);
-        }
-
         /* Loop to ourselves */
 
         struct midgard_instruction ins = v_branch(false, false);
         ins.writeout = true;
         ins.branch.target_block = ctx->block_count - 1;
+        ins.constants[0] = rt * 0x100;
         emit_mir_instruction(ctx, ins);
 
         ctx->current_block->epilogue = true;
         schedule_barrier(ctx);
+        return ins.branch.target_block;
 }
 
 static midgard_block *
@@ -2557,6 +2550,36 @@ pan_format_from_glsl(const struct glsl_type *type)
                 MALI_NR_CHANNELS(4);
 }
 
+/* For each fragment writeout instruction, generate a writeout loop to
+ * associate with it */
+
+static void
+mir_add_writeout_loops(compiler_context *ctx)
+{
+        for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) {
+                midgard_instruction *br = ctx->writeout_branch[rt];
+                if (!br) continue;
+
+                unsigned popped = br->branch.target_block;
+                midgard_block_add_successor(mir_get_block(ctx, popped - 1), ctx->current_block);
+                br->branch.target_block = emit_fragment_epilogue(ctx, rt);
+
+                /* If we have more RTs, we'll need to restore back after our
+                 * loop terminates */
+
+                if ((rt + 1) < ARRAY_SIZE(ctx->writeout_branch) && ctx->writeout_branch[rt + 1]) {
+                        midgard_instruction uncond = v_branch(false, false);
+                        uncond.branch.target_block = popped;
+                        emit_mir_instruction(ctx, uncond);
+                        midgard_block_add_successor(ctx->current_block, mir_get_block(ctx, popped));
+                        schedule_barrier(ctx);
+                } else {
+                        /* We're last, so we can terminate here */
+                        br->last_writeout = true;
+                }
+        }
+}
+
 int
 midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend, unsigned blend_rt, unsigned gpu_id, bool shaderdb)
 {
@@ -2700,6 +2723,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
                 assert(!ins->invert);
         }
 
+        if (ctx->stage == MESA_SHADER_FRAGMENT)
+                mir_add_writeout_loops(ctx);
+
         /* Schedule! */
         schedule_program(ctx);
         mir_ra(ctx);
@@ -2836,22 +2862,14 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
 
         /* Midgard prefetches instruction types, so during emission we
          * need to lookahead. Unless this is the last instruction, in
-         * which we return 1. Or if this is the second to last and the
-         * last is an ALU, then it's also 1... */
+         * which we return 1. */
 
         mir_foreach_block(ctx, block) {
                 mir_foreach_bundle_in_block(block, bundle) {
                         int lookahead = 1;
 
-                        if (current_bundle + 1 < bundle_count) {
-                                uint8_t next = source_order_bundles[current_bundle + 1]->tag;
-
-                                if (!(current_bundle + 2 < bundle_count) && IS_ALU(next)) {
-                                        lookahead = 1;
-                                } else {
-                                        lookahead = next;
-                                }
-                        }
+                        if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
+                                lookahead = source_order_bundles[current_bundle + 1]->tag;
 
                         emit_binary_bundle(ctx, bundle, compiled, lookahead);
                         ++current_bundle;
index 8627e01fa740801c2b902038e8094058df954f3b..b1b2f311ffafdcf76a7809589185fe5c9306ad44 100644 (file)
@@ -153,14 +153,20 @@ mir_compute_liveness(compiler_context *ctx)
 
                 /* If we made progress, we need to process the predecessors */
 
-                if (progress || (blk == exit) || blk->epilogue) {
+                if (progress || !blk->visited) {
                         mir_foreach_predecessor(blk, pred)
                                 _mesa_set_add(work_list, pred);
                 }
+
+                blk->visited = true;
         } while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
 
         /* Liveness is now valid */
         ctx->metadata |= MIDGARD_METADATA_LIVENESS;
+
+        mir_foreach_block(ctx, block) {
+                block->visited = false;
+        }
 }
 
 /* Once liveness data is no longer valid, call this */
index 65e4d2462823c162e4820a7deb50034b4ec23fc6..92be82fe7b84c7ef3060fa74ef6326f2f721a642 100644 (file)
@@ -380,6 +380,27 @@ mir_compute_interference(
         /* First, we need liveness information to be computed per block */
         mir_compute_liveness(ctx);
 
+        /* We need to force r1.w live throughout a blend shader */
+
+        if (ctx->is_blend) {
+                unsigned r1w = ~0;
+
+                mir_foreach_block(ctx, block) {
+                        mir_foreach_instr_in_block_rev(block, ins) {
+                                if (ins->writeout)
+                                        r1w = ins->src[2];
+                        }
+
+                        if (r1w != ~0)
+                                break;
+                }
+
+                mir_foreach_instr_global(ctx, ins) {
+                        if (ins->dest < ctx->temp_count)
+                                lcra_add_node_interference(l, ins->dest, mir_bytemask(ins), r1w, 0xF);
+                }
+        }
+
         /* Now that every block has live_in/live_out computed, we can determine
          * interference by walking each block linearly. Take live_out at the
          * end of each block and walk the block backwards. */
index e0425fd057800fb638793fa2c093fb473b7fb402..46e1f7a4a3525428080dd6a6f0d7d619ab28d1e2 100644 (file)
@@ -890,6 +890,9 @@ mir_schedule_alu(
                 mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);
 
         if (writeout) {
+                /* Propagate up */
+                bundle.last_writeout = branch->last_writeout;
+
                 midgard_instruction add = v_mov(~0, make_compiler_temp(ctx));
 
                 if (!ctx->is_blend) {
@@ -938,7 +941,7 @@ mir_schedule_alu(
 
         /* If we have a render target reference, schedule a move for it */
 
-        if (branch && branch->writeout && branch->constants[0]) {
+        if (branch && branch->writeout && (branch->constants[0] || ctx->is_blend)) {
                 midgard_instruction mov = v_mov(~0, make_compiler_temp(ctx));
                 sadd = mem_dup(&mov, sizeof(midgard_instruction));
                 sadd->unit = UNIT_SADD;