pan/midgard: Generate MRT writeout loops

author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Thu, 2 Jan 2020 17:27:59 +0000 (12:27 -0500)

committer Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Thu, 2 Jan 2020 20:20:55 +0000 (15:20 -0500)
author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 2 Jan 2020 17:27:59 +0000 (12:27 -0500)
committer Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 2 Jan 2020 20:20:55 +0000 (15:20 -0500)
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h

index 1ab0d4dd0d12f609917c715bb72258d884659faf..18ff2572b48422d5506360cb64a7e922515e249a 100644 (file)
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -110,6 +110,7 @@ typedef struct midgard_instruction {
  
          bool compact_branch;
          bool writeout;
+        bool last_writeout;
  
          /* Kind of a hack, but hint against aggressive DCE */
          bool dont_eliminate;
@@ -218,6 +219,7 @@ typedef struct midgard_bundle {
          bool has_embedded_constants;
          float constants[4];
          bool has_blend_constant;
+        bool last_writeout;
  } midgard_bundle;
  
  typedef struct compiler_context {
@@ -303,6 +305,9 @@ typedef struct compiler_context {
  
          /* Model-specific quirk set */
          uint32_t quirks;
+
+        /* Writeout instructions for each render target */
+        midgard_instruction *writeout_branch[4];
  } compiler_context;
  
  /* Per-block live_in/live_out */
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c

index 306d63374b154cdc3385dade79334fc66b2ec842..24765f3da2ab8d4c0c353c50406234787716e621 100644 (file)
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1331,11 +1331,6 @@ compute_builtin_arg(nir_op op)
          }
  }
  
-/* Emit store for a fragment shader, which is encoded via a fancy branch. TODO:
- * Handle MRT here */
-static void
-emit_fragment_epilogue(compiler_context *ctx, unsigned rt);
-
  static void
  emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
  {
@@ -1353,9 +1348,15 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
          /* Emit the branch */
          midgard_instruction *br = emit_mir_instruction(ctx, ins);
          schedule_barrier(ctx);
-        br->branch.target_block = ctx->block_count - 1;
  
-        emit_fragment_epilogue(ctx, rt);
+        assert(rt < ARRAY_SIZE(ctx->writeout_branch));
+        assert(!ctx->writeout_branch[rt]);
+        ctx->writeout_branch[rt] = br;
+
+        /* Push our current location = current block count - 1 = where we'll
+         * jump to. Maybe a bit too clever for my own good */
+
+        br->branch.target_block = ctx->block_count - 1;
  }
  
  static void
@@ -2284,28 +2285,20 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block)
          return progress;
  }
  
-static void
+static unsigned
  emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
  {
-        /* Include a move to specify the render target */
-
-        if (rt > 0) {
-                midgard_instruction rt_move = v_mov(SSA_FIXED_REGISTER(1),
-                                SSA_FIXED_REGISTER(1));
-                rt_move.mask = 1 << COMPONENT_Z;
-                rt_move.unit = UNIT_SADD;
-                emit_mir_instruction(ctx, rt_move);
-        }
-
          /* Loop to ourselves */
  
          struct midgard_instruction ins = v_branch(false, false);
          ins.writeout = true;
          ins.branch.target_block = ctx->block_count - 1;
+        ins.constants[0] = rt * 0x100;
          emit_mir_instruction(ctx, ins);
  
          ctx->current_block->epilogue = true;
          schedule_barrier(ctx);
+        return ins.branch.target_block;
  }
  
  static midgard_block *
@@ -2557,6 +2550,36 @@ pan_format_from_glsl(const struct glsl_type *type)
                  MALI_NR_CHANNELS(4);
  }
  
+/* For each fragment writeout instruction, generate a writeout loop to
+ * associate with it */
+
+static void
+mir_add_writeout_loops(compiler_context *ctx)
+{
+        for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) {
+                midgard_instruction *br = ctx->writeout_branch[rt];
+                if (!br) continue;
+
+                unsigned popped = br->branch.target_block;
+                midgard_block_add_successor(mir_get_block(ctx, popped - 1), ctx->current_block);
+                br->branch.target_block = emit_fragment_epilogue(ctx, rt);
+
+                /* If we have more RTs, we'll need to restore back after our
+                 * loop terminates */
+
+                if ((rt + 1) < ARRAY_SIZE(ctx->writeout_branch) && ctx->writeout_branch[rt + 1]) {
+                        midgard_instruction uncond = v_branch(false, false);
+                        uncond.branch.target_block = popped;
+                        emit_mir_instruction(ctx, uncond);
+                        midgard_block_add_successor(ctx->current_block, mir_get_block(ctx, popped));
+                        schedule_barrier(ctx);
+                } else {
+                        /* We're last, so we can terminate here */
+                        br->last_writeout = true;
+                }
+        }
+}
+
  int
  midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend, unsigned blend_rt, unsigned gpu_id, bool shaderdb)
  {
@@ -2700,6 +2723,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
                  assert(!ins->invert);
          }
  
+        if (ctx->stage == MESA_SHADER_FRAGMENT)
+                mir_add_writeout_loops(ctx);
+
          /* Schedule! */
          schedule_program(ctx);
          mir_ra(ctx);
@@ -2836,22 +2862,14 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
  
          /* Midgard prefetches instruction types, so during emission we
           * need to lookahead. Unless this is the last instruction, in
-         * which we return 1. Or if this is the second to last and the
-         * last is an ALU, then it's also 1... */
+         * which we return 1. */
  
          mir_foreach_block(ctx, block) {
                  mir_foreach_bundle_in_block(block, bundle) {
                          int lookahead = 1;
  
-                        if (current_bundle + 1 < bundle_count) {
-                                uint8_t next = source_order_bundles[current_bundle + 1]->tag;
-
-                                if (!(current_bundle + 2 < bundle_count) && IS_ALU(next)) {
-                                        lookahead = 1;
-                                } else {
-                                        lookahead = next;
-                                }
-                        }
+                        if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
+                                lookahead = source_order_bundles[current_bundle + 1]->tag;
  
                          emit_binary_bundle(ctx, bundle, compiled, lookahead);
                          ++current_bundle;
diff --git a/src/panfrost/midgard/midgard_liveness.c b/src/panfrost/midgard/midgard_liveness.c

index 8627e01fa740801c2b902038e8094058df954f3b..b1b2f311ffafdcf76a7809589185fe5c9306ad44 100644 (file)
--- a/src/panfrost/midgard/midgard_liveness.c
+++ b/src/panfrost/midgard/midgard_liveness.c
@@ -153,14 +153,20 @@ mir_compute_liveness(compiler_context *ctx)
  
                  /* If we made progress, we need to process the predecessors */
  
-                if (progress || (blk == exit) || blk->epilogue) {
+                if (progress || !blk->visited) {
                          mir_foreach_predecessor(blk, pred)
                                  _mesa_set_add(work_list, pred);
                  }
+
+                blk->visited = true;
          } while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
  
          /* Liveness is now valid */
          ctx->metadata |= MIDGARD_METADATA_LIVENESS;
+
+        mir_foreach_block(ctx, block) {
+                block->visited = false;
+        }
  }
  
  /* Once liveness data is no longer valid, call this */
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c

index 65e4d2462823c162e4820a7deb50034b4ec23fc6..92be82fe7b84c7ef3060fa74ef6326f2f721a642 100644 (file)
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -380,6 +380,27 @@ mir_compute_interference(
          /* First, we need liveness information to be computed per block */
          mir_compute_liveness(ctx);
  
+        /* We need to force r1.w live throughout a blend shader */
+
+        if (ctx->is_blend) {
+                unsigned r1w = ~0;
+
+                mir_foreach_block(ctx, block) {
+                        mir_foreach_instr_in_block_rev(block, ins) {
+                                if (ins->writeout)
+                                        r1w = ins->src[2];
+                        }
+
+                        if (r1w != ~0)
+                                break;
+                }
+
+                mir_foreach_instr_global(ctx, ins) {
+                        if (ins->dest < ctx->temp_count)
+                                lcra_add_node_interference(l, ins->dest, mir_bytemask(ins), r1w, 0xF);
+                }
+        }
+
          /* Now that every block has live_in/live_out computed, we can determine
           * interference by walking each block linearly. Take live_out at the
           * end of each block and walk the block backwards. */
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c

index e0425fd057800fb638793fa2c093fb473b7fb402..46e1f7a4a3525428080dd6a6f0d7d619ab28d1e2 100644 (file)
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -890,6 +890,9 @@ mir_schedule_alu(
                  mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);
  
          if (writeout) {
+                /* Propagate up */
+                bundle.last_writeout = branch->last_writeout;
+
                  midgard_instruction add = v_mov(~0, make_compiler_temp(ctx));
  
                  if (!ctx->is_blend) {
@@ -938,7 +941,7 @@ mir_schedule_alu(
  
          /* If we have a render target reference, schedule a move for it */
  
-        if (branch && branch->writeout && branch->constants[0]) {
+        if (branch && branch->writeout && (branch->constants[0] || ctx->is_blend)) {
                  midgard_instruction mov = v_mov(~0, make_compiler_temp(ctx));
                  sadd = mem_dup(&mov, sizeof(midgard_instruction));
                  sadd->unit = UNIT_SADD;
author	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	Thu, 2 Jan 2020 17:27:59 +0000 (12:27 -0500)
committer	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	Thu, 2 Jan 2020 20:20:55 +0000 (15:20 -0500)
src/panfrost/midgard/compiler.h		patch \| blob \| history
src/panfrost/midgard/midgard_compile.c		patch \| blob \| history
src/panfrost/midgard/midgard_liveness.c		patch \| blob \| history
src/panfrost/midgard/midgard_ra.c		patch \| blob \| history
src/panfrost/midgard/midgard_schedule.c		patch \| blob \| history