freedreno/ir3: Fix duplicated fine derivatives instructions.

author Eric Anholt <eric@anholt.net>

Tue, 30 Jun 2020 19:17:10 +0000 (12:17 -0700)

committer Marge Bot <eric+marge@anholt.net>

Sat, 18 Jul 2020 00:43:44 +0000 (00:43 +0000)
author Eric Anholt <eric@anholt.net>
Tue, 30 Jun 2020 19:17:10 +0000 (12:17 -0700)
committer Marge Bot <eric+marge@anholt.net>
Sat, 18 Jul 2020 00:43:44 +0000 (00:43 +0000)
diff --git a/.gitlab-ci/deqp-freedreno-a630-fails.txt b/.gitlab-ci/deqp-freedreno-a630-fails.txt

index daf0960d63ba85a4eba04c902f760ef5c3eb2f2f..274cfd6239aceaf37c7ecec368230f752718bda8 100644 (file)
--- a/.gitlab-ci/deqp-freedreno-a630-fails.txt
+++ b/.gitlab-ci/deqp-freedreno-a630-fails.txt
@@ -5,7 +5,6 @@ dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_clear
  dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw
  dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0
  dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3
-dEQP-VK.glsl.derivate.fwidthfine.uniform_loop.vec3_mediump
  dEQP-VK.glsl.linkage.varying.struct.mat3x2
  dEQP-VK.graphicsfuzz.mat-array-deep-control-flow
  dEQP-VK.pipeline.spec_constant.graphics.geometry.composite.array.array_vec4
diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c

index de8387f3bcafe470c2d41825a37c613240388271..6cd2789fcd1ed4deef48c483cd93d05e30c61e4f 100644 (file)
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -1263,6 +1263,9 @@ static const struct opc_info {
         OPC(5, OPC_DSYPP_1,      dsypp.1),
         OPC(5, OPC_RGETPOS,      rgetpos),
         OPC(5, OPC_RGETINFO,     rgetinfo),
+       /* macros are needed here for ir3_print */
+       OPC(5, OPC_DSXPP_MACRO,  dsxpp.macro),
+       OPC(5, OPC_DSYPP_MACRO,  dsypp.macro),
  
  
         /* category 6: */
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h

index 822d9dd7bc1cb90a934279ade1e16f6f7426450e..d2715080060a123493da61ba78abf48007529fe5 100644 (file)
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -185,6 +185,9 @@ typedef enum {
         OPC_DSYPP_1         = _OPC(5, 25),
         OPC_RGETPOS         = _OPC(5, 26),
         OPC_RGETINFO        = _OPC(5, 27),
+       /* cat5 meta instructions, placed above the cat5 opc field's size */
+       OPC_DSXPP_MACRO     = _OPC(5, 32),
+       OPC_DSYPP_MACRO     = _OPC(5, 33),
  
         /* category 6: */
         OPC_LDG             = _OPC(6, 0),        /* load-global */
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h

index 634089c590bbe271939717f76f9cc8b4b3c634c6..a198e4cb92705ca53b976f7cc282bbd4b05228e0 100644 (file)
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1663,9 +1663,9 @@ INSTR1(SQRT)
  
  /* cat5 instructions: */
  INSTR1(DSX)
-INSTR1(DSXPP_1)
+INSTR1(DSXPP_MACRO)
  INSTR1(DSY)
-INSTR1(DSYPP_1)
+INSTR1(DSYPP_MACRO)
  INSTR1F(3D, DSX)
  INSTR1F(3D, DSY)
  INSTR1(RGETPOS)
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c

index acfe4341ccd99fa528242c63bdeadb43c798756f..10a51eb55e7a4a410c5e2326c48c82350e34e1ca 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -478,7 +478,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                 dst[0]->cat5.type = TYPE_F32;
                 break;
         case nir_op_fddx_fine:
-               dst[0] = ir3_DSXPP_1(b, src[0], 0);
+               dst[0] = ir3_DSXPP_MACRO(b, src[0], 0);
                 dst[0]->cat5.type = TYPE_F32;
                 break;
         case nir_op_fddy:
@@ -488,7 +488,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                 break;
                 break;
         case nir_op_fddy_fine:
-               dst[0] = ir3_DSYPP_1(b, src[0], 0);
+               dst[0] = ir3_DSYPP_MACRO(b, src[0], 0);
                 dst[0]->cat5.type = TYPE_F32;
                 break;
         case nir_op_flt:
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c

index c4d90c15ba94554d66b2b47e8ac0f923423438e9..69efdbabc3f1828854d0b7260c447a4c8b944675 100644 (file)
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -233,13 +233,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                         list_addtail(&n->node, &block->instr_list);
                 }
  
-               if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) {
-                       struct ir3_instruction *op_p = ir3_instr_clone(n);
-                       op_p->flags = IR3_INSTR_P;
-
-                       ctx->so->need_fine_derivatives = true;
-               }
-
                 if (is_sfu(n))
                         regmask_set(&state->needs_ss, n->regs[0]);
  
@@ -360,6 +353,42 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
         return true;
  }
  
+/* Expands dsxpp and dsypp macros to:
+ *
+ * dsxpp.1 dst, src
+ * dsxpp.1.p dst, src
+ *
+ * We apply this after flags syncing, as we don't want to sync in between the
+ * two (which might happen if dst == src).  We do it before nop scheduling
+ * because that needs to count actual instructions.
+ */
+static bool
+apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
+{
+       struct list_head instr_list;
+
+       /* remove all the instructions from the list, we'll be adding
+        * them back in as we go
+        */
+       list_replace(&block->instr_list, &instr_list);
+       list_inithead(&block->instr_list);
+
+       foreach_instr_safe (n, &instr_list) {
+               list_addtail(&n->node, &block->instr_list);
+
+               if (n->opc == OPC_DSXPP_MACRO || n->opc == OPC_DSYPP_MACRO) {
+                       n->opc = (n->opc == OPC_DSXPP_MACRO) ? OPC_DSXPP_1 : OPC_DSYPP_1;
+
+                       struct ir3_instruction *op_p = ir3_instr_clone(n);
+                       op_p->flags = IR3_INSTR_P;
+
+                       ctx->so->need_fine_derivatives = true;
+               }
+       }
+
+       return true;
+}
+
  /* NOTE: branch instructions are always the last instruction(s)
   * in the block.  We take advantage of this as we resolve the
   * branches, since "if (foo) break;" constructs turn into
@@ -752,6 +781,11 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
         block_sched(ir);
         if (so->type == MESA_SHADER_FRAGMENT)
                 kill_sched(ir, so);
+
+       foreach_block (block, &ir->block_list) {
+               progress |= apply_fine_deriv_macro(ctx, block);
+       }
+
         nop_sched(ir);
  
         do {
author	Eric Anholt <eric@anholt.net>
	Tue, 30 Jun 2020 19:17:10 +0000 (12:17 -0700)
committer	Marge Bot <eric+marge@anholt.net>
	Sat, 18 Jul 2020 00:43:44 +0000 (00:43 +0000)
.gitlab-ci/deqp-freedreno-a630-fails.txt		patch \| blob \| history
src/freedreno/ir3/disasm-a3xx.c		patch \| blob \| history
src/freedreno/ir3/instr-a3xx.h		patch \| blob \| history
src/freedreno/ir3/ir3.h		patch \| blob \| history
src/freedreno/ir3/ir3_compiler_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_legalize.c		patch \| blob \| history