pan/midgard: Add nir_intrinsic_store_zs_output_pan support

author Boris Brezillon <boris.brezillon@collabora.com>

Fri, 31 Jan 2020 09:05:16 +0000 (10:05 +0100)

committer Marge Bot <eric+marge@anholt.net>

Wed, 5 Feb 2020 15:41:55 +0000 (15:41 +0000)
author Boris Brezillon <boris.brezillon@collabora.com>
Fri, 31 Jan 2020 09:05:16 +0000 (10:05 +0100)
committer Marge Bot <eric+marge@anholt.net>
Wed, 5 Feb 2020 15:41:55 +0000 (15:41 +0000)
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h

index 317124b39219d27659164b4137cfae3958af56e6..23dcba521ca4d90c2d6dbfad436669e72c5a9c01 100644 (file)
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -110,6 +110,8 @@ typedef struct midgard_instruction {
  
          bool compact_branch;
          bool writeout;
+        bool writeout_depth;
+        bool writeout_stencil;
          bool last_writeout;
  
          /* Kind of a hack, but hint against aggressive DCE */
@@ -227,6 +229,7 @@ enum midgard_rt_id {
          MIDGARD_COLOR_RT1,
          MIDGARD_COLOR_RT2,
          MIDGARD_COLOR_RT3,
+        MIDGARD_ZS_RT,
          MIDGARD_NUM_RTS,
  };
  
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c

index cf11973cb6f4b12819ee366f5b7896ca671aa24c..dca502bd4259afe80dd3e55a81ac108e9b486511 100644 (file)
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1444,8 +1444,14 @@ compute_builtin_arg(nir_op op)
  }
  
  static void
-emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
+emit_fragment_store(compiler_context *ctx, unsigned src, enum midgard_rt_id rt)
  {
+        assert(rt < ARRAY_SIZE(ctx->writeout_branch));
+
+        midgard_instruction *br = ctx->writeout_branch[rt];
+
+        assert(!br);
+
          emit_explicit_constant(ctx, src, src);
  
          struct midgard_instruction ins =
@@ -1455,14 +1461,12 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
  
          /* Add dependencies */
          ins.src[0] = src;
-        ins.constants.u32[0] = rt * 0x100;
+        ins.constants.u32[0] = rt == MIDGARD_ZS_RT ?
+                               0xFF : (rt - MIDGARD_COLOR_RT0) * 0x100;
  
          /* Emit the branch */
-        midgard_instruction *br = emit_mir_instruction(ctx, ins);
+        br = emit_mir_instruction(ctx, ins);
          schedule_barrier(ctx);
-
-        assert(rt < ARRAY_SIZE(ctx->writeout_branch));
-        assert(!ctx->writeout_branch[rt]);
          ctx->writeout_branch[rt] = br;
  
          /* Push our current location = current block count - 1 = where we'll
@@ -1656,6 +1660,22 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                  break;
          }
  
+        case nir_intrinsic_store_zs_output_pan: {
+                assert(ctx->stage == MESA_SHADER_FRAGMENT);
+                emit_fragment_store(ctx, nir_src_index(ctx, &instr->src[0]),
+                                    MIDGARD_ZS_RT);
+
+                midgard_instruction *br = ctx->writeout_branch[MIDGARD_ZS_RT];
+
+                if (!nir_intrinsic_component(instr))
+                        br->writeout_depth = true;
+                if (nir_intrinsic_component(instr) ||
+                    instr->num_components)
+                        br->writeout_stencil = true;
+                assert(br->writeout_depth | br->writeout_stencil);
+                break;
+        }
+
          case nir_intrinsic_store_output:
                  assert(nir_src_is_const(instr->src[1]) && "no indirect outputs");
  
@@ -2449,11 +2469,13 @@ static unsigned
  emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
  {
          /* Loop to ourselves */
-
+        midgard_instruction *br = ctx->writeout_branch[rt];
          struct midgard_instruction ins = v_branch(false, false);
          ins.writeout = true;
+        ins.writeout_depth = br->writeout_depth;
+        ins.writeout_stencil = br->writeout_stencil;
          ins.branch.target_block = ctx->block_count - 1;
-        ins.constants.u32[0] = rt * 0x100;
+        ins.constants.u32[0] = br->constants.u32[0];
          emit_mir_instruction(ctx, ins);
  
          ctx->current_block->epilogue = true;
@@ -2754,7 +2776,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
          ctx->stage = nir->info.stage;
          ctx->is_blend = is_blend;
          ctx->alpha_ref = program->alpha_ref;
-        ctx->blend_rt = blend_rt;
+        ctx->blend_rt = MIDGARD_COLOR_RT0 + blend_rt;
          ctx->quirks = midgard_get_quirks(gpu_id);
  
          /* Start off with a safe cutoff, allowing usage of all 16 work
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c

index 92be82fe7b84c7ef3060fa74ef6326f2f721a642..eec4876eefe60a01023f420c2f13df2d86f37070 100644 (file)
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -561,8 +561,14 @@ allocate_registers(compiler_context *ctx, bool *spilled)
          mir_foreach_instr_global(ctx, ins) {
                  if (!(ins->compact_branch && ins->writeout)) continue;
  
-                if (ins->src[0] < ctx->temp_count)
-                        l->solutions[ins->src[0]] = 0;
+                if (ins->src[0] < ctx->temp_count) {
+                        if (ins->writeout_depth)
+                                l->solutions[ins->src[0]] = (16 * 1) + COMPONENT_X * 4;
+                        else if (ins->writeout_stencil)
+                                l->solutions[ins->src[0]] = (16 * 1) + COMPONENT_Y * 4;
+                        else
+                                l->solutions[ins->src[0]] = 0;
+                }
  
                  if (ins->src[1] < ctx->temp_count)
                          l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4;
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c

index e359cbb2ed1d4310e817d4641ea04beb909d8c5e..1697f086390c81d540cca1eea9f650ae0b79538b 100644 (file)
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -845,6 +845,7 @@ mir_schedule_alu(
          mir_choose_alu(&branch, instructions, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
          mir_update_worklist(worklist, len, instructions, branch);
          bool writeout = branch && branch->writeout;
+        bool zs_writeout = writeout && (branch->writeout_depth | branch->writeout_stencil);
  
          if (branch && branch->branch.conditional) {
                  midgard_instruction *cond = mir_schedule_condition(ctx, &predicate, worklist, len, instructions, branch);
@@ -859,13 +860,14 @@ mir_schedule_alu(
  
          mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL);
  
-        if (!writeout)
+        if (!writeout) {
                  mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);
-
-        if (writeout) {
+        } else {
                  /* Propagate up */
                  bundle.last_writeout = branch->last_writeout;
+        }
  
+        if (writeout && !zs_writeout) {
                  vadd = ralloc(ctx, midgard_instruction);
                  *vadd = v_mov(~0, make_compiler_temp(ctx));
  
@@ -928,9 +930,9 @@ mir_schedule_alu(
  
          /* Check if writeout reads its own register */
  
-        if (branch && branch->writeout) {
+        if (writeout) {
                  midgard_instruction *stages[] = { sadd, vadd, smul };
-                unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : branch->src[0];
+                unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : branch->src[0];
                  unsigned writeout_mask = 0x0;
                  bool bad_writeout = false;
  
@@ -946,13 +948,17 @@ mir_schedule_alu(
                  }
  
                  /* It's possible we'll be able to schedule something into vmul
-                 * to fill r0. Let's peak into the future, trying to schedule
+                 * to fill r0/r1. Let's peak into the future, trying to schedule
                   * vmul specially that way. */
  
-                if (!bad_writeout && writeout_mask != 0xF) {
+                unsigned full_mask = zs_writeout ?
+                        (1 << (branch->writeout_depth + branch->writeout_stencil)) - 1 :
+                        0xF;
+
+                if (!bad_writeout && writeout_mask != full_mask) {
                          predicate.unit = UNIT_VMUL;
                          predicate.dest = src;
-                        predicate.mask = writeout_mask ^ 0xF;
+                        predicate.mask = writeout_mask ^ full_mask;
  
                          struct midgard_instruction *peaked =
                                  mir_choose_instruction(instructions, worklist, len, &predicate);
@@ -961,7 +967,7 @@ mir_schedule_alu(
                                  vmul = peaked;
                                  vmul->unit = UNIT_VMUL;
                                  writeout_mask |= predicate.mask;
-                                assert(writeout_mask == 0xF);
+                                assert(writeout_mask == full_mask);
                          }
  
                          /* Cleanup */
@@ -969,13 +975,13 @@ mir_schedule_alu(
                  }
  
                  /* Finally, add a move if necessary */
-                if (bad_writeout || writeout_mask != 0xF) {
-                        unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : make_compiler_temp(ctx);
+                if (bad_writeout || writeout_mask != full_mask) {
+                        unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : make_compiler_temp(ctx);
  
                          vmul = ralloc(ctx, midgard_instruction);
                          *vmul = v_mov(src, temp);
                          vmul->unit = UNIT_VMUL;
-                        vmul->mask = 0xF ^ writeout_mask;
+                        vmul->mask = full_mask ^ writeout_mask;
  
                          /* Rewrite to use our temp */
  
diff --git a/src/panfrost/midgard/mir.c b/src/panfrost/midgard/mir.c

index 5241c6334b22e923b1c3f23db4acd564d7494f93..5e9acc05dbd0e46d8e8e78cabe70d480890361c1 100644 (file)
--- a/src/panfrost/midgard/mir.c
+++ b/src/panfrost/midgard/mir.c
@@ -468,9 +468,19 @@ mir_bytemask_of_read_components_single(unsigned *swizzle, unsigned inmask, midga
  uint16_t
  mir_bytemask_of_read_components_index(midgard_instruction *ins, unsigned i)
  {
-        /* Branch writeout uses all components */
-        if (ins->compact_branch && ins->writeout && (i == 0))
-                return 0xFFFF;
+        if (ins->compact_branch && ins->writeout && (i == 0)) {
+                /* Non-ZS writeout uses all components */
+                if (!ins->writeout_depth && !ins->writeout_stencil)
+                        return 0xFFFF;
+
+                /* For ZS-writeout, if both Z and S are written we need two
+                 * components, otherwise we only need one.
+                 */
+                if (ins->writeout_depth && ins->writeout_stencil)
+                        return 0xFF;
+                else
+                        return 0xF;
+        }
  
          /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
          if (ins->compact_branch && ins->branch.conditional && (i == 0))
author	Boris Brezillon <boris.brezillon@collabora.com>
	Fri, 31 Jan 2020 09:05:16 +0000 (10:05 +0100)
committer	Marge Bot <eric+marge@anholt.net>
	Wed, 5 Feb 2020 15:41:55 +0000 (15:41 +0000)
src/panfrost/midgard/compiler.h		patch \| blob \| history
src/panfrost/midgard/midgard_compile.c		patch \| blob \| history
src/panfrost/midgard/midgard_ra.c		patch \| blob \| history
src/panfrost/midgard/midgard_schedule.c		patch \| blob \| history
src/panfrost/midgard/mir.c		patch \| blob \| history