pan/midgard: Handle fragment writeout in RA

author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Fri, 30 Aug 2019 18:06:33 +0000 (11:06 -0700)

committer Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Fri, 30 Aug 2019 22:50:27 +0000 (15:50 -0700)
author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 30 Aug 2019 18:06:33 +0000 (11:06 -0700)
committer Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 30 Aug 2019 22:50:27 +0000 (15:50 -0700)
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h

index 00c6b52413aee7e244e0836e41fbdb6188603ae7..82ccde658e3a8cdb04b9ced0aa3c8e83ac785a15 100644 (file)
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -587,13 +587,14 @@ struct ra_graph;
  /* Broad types of register classes so we can handle special
   * registers */
  
-#define NR_REG_CLASSES 5
+#define NR_REG_CLASSES 6
  
  #define REG_CLASS_WORK          0
  #define REG_CLASS_LDST          1
  #define REG_CLASS_LDST27        2
  #define REG_CLASS_TEXR          3
  #define REG_CLASS_TEXW          4
+#define REG_CLASS_FRAGC         5
  
  void mir_lower_special_reads(compiler_context *ctx);
  struct ra_graph* allocate_registers(compiler_context *ctx, bool *spilled);
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c

index 158f89a23c35e9da2cb1b1706f0cb6c7a16fa592..eec3e8d56edacb62d8d4c16ac0aba72d6ea46f51 100644 (file)
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1379,16 +1379,7 @@ compute_builtin_arg(nir_op op)
  static void
  emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
  {
-        /* First, move in whatever we're outputting */
-        midgard_instruction move = v_mov(src, blank_alu_src, SSA_FIXED_REGISTER(0));
-        if (rt != 0) {
-                /* Force a tight schedule. TODO: Make the scheduler MRT aware */
-                move.unit = UNIT_VMUL;
-                move.precede_break = true;
-                move.dont_eliminate = true;
-        }
-
-        emit_mir_instruction(ctx, move);
+        emit_explicit_constant(ctx, src, src);
  
          /* If we're doing MRT, we need to specify the render target */
  
@@ -1974,6 +1965,7 @@ inline_alu_constants(compiler_context *ctx, midgard_block *block)
          mir_foreach_instr_in_block(block, alu) {
                  /* Other instructions cannot inline constants */
                  if (alu->type != TAG_ALU_4) continue;
+                if (alu->compact_branch) continue;
  
                  /* If there is already a constant here, we can do nothing */
                  if (alu->has_constants) continue;
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h

index b5fbb3de6ed4f23153d3506b4a2d9e1bafab4f4f..bf512a0ca5930457a8e0e1393632542ab331ec92 100644 (file)
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -42,10 +42,10 @@ struct midgard_screen {
          struct ra_regs *regs[9];
  
          /* Work register classes corresponds to the above register sets. 20 per
-         * set for 4 classes per work/ldst/ldst27/texr/texw. TODO: Unify with
+         * set for 5 classes per work/ldst/ldst27/texr/texw/fragc. TODO: Unify with
           * compiler.h */
  
-        unsigned reg_classes[9][4 * 5];
+        unsigned reg_classes[9][5 * 5];
  };
  
  /* Define the general compiler entry point */
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c

index c19c6674f57b6906a907fb8414f5458ae11a93e5..11bef79a42c6c8d6c4e9f49b36d0efeec2ef81c5 100644 (file)
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -49,6 +49,7 @@
  /* We have overlapping register classes for special registers, handled via
   * shadows */
  
+#define SHADOW_R0  17
  #define SHADOW_R28 18
  #define SHADOW_R29 19
  
@@ -159,6 +160,8 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, unsigned reg)
  
          if (phys >= SHADOW_R28 && phys <= SHADOW_R29)
                  phys += 28 - SHADOW_R28;
+        else if (phys == SHADOW_R0)
+                phys = 0;
  
          struct phys_reg r = {
                  .reg = phys,
@@ -180,12 +183,12 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, unsigned reg)
   * special register allocation */
  
  static void
-add_shadow_conflicts (struct ra_regs *regs, unsigned base, unsigned shadow)
+add_shadow_conflicts (struct ra_regs *regs, unsigned base, unsigned shadow, unsigned shadow_count)
  {
          for (unsigned a = 0; a < WORK_STRIDE; ++a) {
                  unsigned reg_a = (WORK_STRIDE * base) + a;
  
-                for (unsigned b = 0; b < WORK_STRIDE; ++b) {
+                for (unsigned b = 0; b < shadow_count; ++b) {
                          unsigned reg_b = (WORK_STRIDE * shadow) + b;
  
                          ra_add_reg_conflict(regs, reg_a, reg_b);
@@ -202,7 +205,7 @@ create_register_set(unsigned work_count, unsigned *classes)
          /* First, initialize the RA */
          struct ra_regs *regs = ra_alloc_reg_set(NULL, virtual_count, true);
  
-        for (unsigned c = 0; c < NR_REG_CLASSES; ++c) {
+        for (unsigned c = 0; c < (NR_REG_CLASSES - 1); ++c) {
                  int work_vec4 = ra_alloc_reg_class(regs);
                  int work_vec3 = ra_alloc_reg_class(regs);
                  int work_vec2 = ra_alloc_reg_class(regs);
@@ -253,10 +256,18 @@ create_register_set(unsigned work_count, unsigned *classes)
                  }
          }
  
+        int fragc = ra_alloc_reg_class(regs);
+
+        classes[4*REG_CLASS_FRAGC + 0] = fragc;
+        classes[4*REG_CLASS_FRAGC + 1] = fragc;
+        classes[4*REG_CLASS_FRAGC + 2] = fragc;
+        classes[4*REG_CLASS_FRAGC + 3] = fragc;
+        ra_class_add_reg(regs, fragc, WORK_STRIDE * SHADOW_R0);
  
          /* We have duplicate classes */
-        add_shadow_conflicts(regs, 28, SHADOW_R28);
-        add_shadow_conflicts(regs, 29, SHADOW_R29);
+        add_shadow_conflicts(regs,  0, SHADOW_R0,  1);
+        add_shadow_conflicts(regs, 28, SHADOW_R28, WORK_STRIDE);
+        add_shadow_conflicts(regs, 29, SHADOW_R29, WORK_STRIDE);
  
          /* We're done setting up */
          ra_set_finalize(regs, NULL);
@@ -399,6 +410,7 @@ mir_lower_special_reads(compiler_context *ctx)
  
          unsigned *alur = calloc(sz, 1);
          unsigned *aluw = calloc(sz, 1);
+        unsigned *brar = calloc(sz, 1);
          unsigned *ldst = calloc(sz, 1);
          unsigned *texr = calloc(sz, 1);
          unsigned *texw = calloc(sz, 1);
@@ -412,6 +424,10 @@ mir_lower_special_reads(compiler_context *ctx)
                          mark_node_class(alur, ins->src[0]);
                          mark_node_class(alur, ins->src[1]);
                          mark_node_class(alur, ins->src[2]);
+
+                        if (ins->compact_branch && ins->writeout)
+                                mark_node_class(brar, ins->src[0]);
+
                          break;
  
                  case TAG_LOAD_STORE_4:
@@ -443,6 +459,7 @@ mir_lower_special_reads(compiler_context *ctx)
          for (unsigned i = 0; i < ctx->temp_count; ++i) {
                  bool is_alur = BITSET_TEST(alur, i);
                  bool is_aluw = BITSET_TEST(aluw, i);
+                bool is_brar = BITSET_TEST(brar, i);
                  bool is_ldst = BITSET_TEST(ldst, i);
                  bool is_texr = BITSET_TEST(texr, i);
                  bool is_texw = BITSET_TEST(texw, i);
@@ -457,7 +474,8 @@ mir_lower_special_reads(compiler_context *ctx)
                          (is_alur && (is_ldst || is_texr)) ||
                          (is_ldst && (is_alur || is_texr || is_texw)) ||
                          (is_texr && (is_alur || is_ldst || is_texw)) ||
-                        (is_texw && (is_aluw || is_ldst || is_texr));
+                        (is_texw && (is_aluw || is_ldst || is_texr)) ||
+                        (is_brar && is_texw);
          
                  if (!collision)
                          continue;
@@ -465,8 +483,8 @@ mir_lower_special_reads(compiler_context *ctx)
                  /* Use the index as-is as the work copy. Emit copies for
                   * special uses */
  
-                unsigned classes[] = { TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4 };
-                bool collisions[] = { is_ldst, is_texr, is_texw && is_aluw };
+                unsigned classes[] = { TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4, TAG_ALU_4};
+                bool collisions[] = { is_ldst, is_texr, is_texw && is_aluw, is_brar };
  
                  for (unsigned j = 0; j < ARRAY_SIZE(collisions); ++j) {
                          if (!collisions[j]) continue;
@@ -517,6 +535,7 @@ mir_lower_special_reads(compiler_context *ctx)
  
          free(alur);
          free(aluw);
+        free(brar);
          free(ldst);
          free(texr);
          free(texw);
@@ -766,6 +785,12 @@ allocate_registers(compiler_context *ctx, bool *spilled)
                  assert(check_read_class(found_class, ins->type, ins->src[2]));
          }
  
+        /* Mark writeout to r0 */
+        mir_foreach_instr_global(ctx, ins) {
+                if (ins->compact_branch && ins->writeout)
+                        set_class(found_class, ins->src[0], REG_CLASS_FRAGC);
+        }
+
          for (unsigned i = 0; i < ctx->temp_count; ++i) {
                  unsigned class = found_class[i];
                  ra_set_node_class(g, i, classes[class]);
diff --git a/src/panfrost/midgard/midgard_ra_pipeline.c b/src/panfrost/midgard/midgard_ra_pipeline.c

index feb457de0f9ad2665b5eab07b160ed34ab4362cf..afbcf5b64a012460d935a5f58b46375a8efff79b 100644 (file)
--- a/src/panfrost/midgard/midgard_ra_pipeline.c
+++ b/src/panfrost/midgard/midgard_ra_pipeline.c
@@ -60,6 +60,13 @@ mir_pipeline_ins(
          for (unsigned i = 0; i < bundle->instruction_count; ++i) {
                  midgard_instruction *q = bundle->instructions[i];
                  read_mask |= mir_mask_of_read_components(q, node);
+
+                /* The fragment colour can't be pipelined (well, it is
+                 * pipelined in r0, but this is a delicate dance with
+                 * scheduling and RA, not for us to worry about) */
+
+                if (q->compact_branch && q->writeout && mir_has_arg(q, node))
+                        return false;
          }
  
          /* Now analyze for a write mask */
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c

index 7b687766491d769d537e40ae07d7a769addc5f89..6693a1b725bff62e6d43827d5c64812f5563aa70 100644 (file)
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -160,7 +160,7 @@ midgard_has_hazard(
   */
  
  static bool
-can_writeout_fragment(compiler_context *ctx, midgard_instruction **bundle, unsigned count, unsigned node_count)
+can_writeout_fragment(compiler_context *ctx, midgard_instruction **bundle, unsigned count, unsigned node_count, unsigned r0)
  {
          /* First scan for which components of r0 are written out. Initially
           * none are written */
@@ -176,7 +176,7 @@ can_writeout_fragment(compiler_context *ctx, midgard_instruction **bundle, unsig
          for (unsigned i = 0; i < count; ++i) {
                  midgard_instruction *ins = bundle[i];
  
-                if (ins->dest != SSA_FIXED_REGISTER(0))
+                if (ins->dest != r0)
                          continue;
  
                  /* Record written out mask */
@@ -516,7 +516,7 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction
                                  /* All of r0 has to be written out along with
                                   * the branch writeout */
  
-                                if (ains->writeout && !can_writeout_fragment(ctx, scheduled, index, ctx->temp_count)) {
+                                if (ains->writeout && !can_writeout_fragment(ctx, scheduled, index, ctx->temp_count, ains->src[0])) {
                                          /* We only work on full moves
                                           * at the beginning. We could
                                           * probably do better */
author	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	Fri, 30 Aug 2019 18:06:33 +0000 (11:06 -0700)
committer	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	Fri, 30 Aug 2019 22:50:27 +0000 (15:50 -0700)
src/panfrost/midgard/compiler.h		patch \| blob \| history
src/panfrost/midgard/midgard_compile.c		patch \| blob \| history
src/panfrost/midgard/midgard_compile.h		patch \| blob \| history
src/panfrost/midgard/midgard_ra.c		patch \| blob \| history
src/panfrost/midgard/midgard_ra_pipeline.c		patch \| blob \| history
src/panfrost/midgard/midgard_schedule.c		patch \| blob \| history