freedreno/ir3: Fix duplicated fine derivatives instructions.
authorEric Anholt <eric@anholt.net>
Tue, 30 Jun 2020 19:17:10 +0000 (12:17 -0700)
committerMarge Bot <eric+marge@anholt.net>
Sat, 18 Jul 2020 00:43:44 +0000 (00:43 +0000)
legalize_block() can get run multiple times, which I didn't notice when
adding fine derivs support.  Other instruction clones change things such
that the legalization won't trigger again, but that didn't apply to the
DS.PP legalization.  To keep someone else from tripping over this, split
the one-shot legalization out of the iterative sync flag application.

Fixes failures in dEQP-VK.glsl.derivate.dfdxfine.*

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3198
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5699>

.gitlab-ci/deqp-freedreno-a630-fails.txt
src/freedreno/ir3/disasm-a3xx.c
src/freedreno/ir3/instr-a3xx.h
src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_legalize.c

index daf0960d63ba85a4eba04c902f760ef5c3eb2f2f..274cfd6239aceaf37c7ecec368230f752718bda8 100644 (file)
@@ -5,7 +5,6 @@ dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_clear
 dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw
 dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0
 dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3
-dEQP-VK.glsl.derivate.fwidthfine.uniform_loop.vec3_mediump
 dEQP-VK.glsl.linkage.varying.struct.mat3x2
 dEQP-VK.graphicsfuzz.mat-array-deep-control-flow
 dEQP-VK.pipeline.spec_constant.graphics.geometry.composite.array.array_vec4
index de8387f3bcafe470c2d41825a37c613240388271..6cd2789fcd1ed4deef48c483cd93d05e30c61e4f 100644 (file)
@@ -1263,6 +1263,9 @@ static const struct opc_info {
        OPC(5, OPC_DSYPP_1,      dsypp.1),
        OPC(5, OPC_RGETPOS,      rgetpos),
        OPC(5, OPC_RGETINFO,     rgetinfo),
+       /* macros are needed here for ir3_print */
+       OPC(5, OPC_DSXPP_MACRO,  dsxpp.macro),
+       OPC(5, OPC_DSYPP_MACRO,  dsypp.macro),
 
 
        /* category 6: */
index 822d9dd7bc1cb90a934279ade1e16f6f7426450e..d2715080060a123493da61ba78abf48007529fe5 100644 (file)
@@ -185,6 +185,9 @@ typedef enum {
        OPC_DSYPP_1         = _OPC(5, 25),
        OPC_RGETPOS         = _OPC(5, 26),
        OPC_RGETINFO        = _OPC(5, 27),
+       /* cat5 meta instructions, placed above the cat5 opc field's size */
+       OPC_DSXPP_MACRO     = _OPC(5, 32),
+       OPC_DSYPP_MACRO     = _OPC(5, 33),
 
        /* category 6: */
        OPC_LDG             = _OPC(6, 0),        /* load-global */
index 634089c590bbe271939717f76f9cc8b4b3c634c6..a198e4cb92705ca53b976f7cc282bbd4b05228e0 100644 (file)
@@ -1663,9 +1663,9 @@ INSTR1(SQRT)
 
 /* cat5 instructions: */
 INSTR1(DSX)
-INSTR1(DSXPP_1)
+INSTR1(DSXPP_MACRO)
 INSTR1(DSY)
-INSTR1(DSYPP_1)
+INSTR1(DSYPP_MACRO)
 INSTR1F(3D, DSX)
 INSTR1F(3D, DSY)
 INSTR1(RGETPOS)
index acfe4341ccd99fa528242c63bdeadb43c798756f..10a51eb55e7a4a410c5e2326c48c82350e34e1ca 100644 (file)
@@ -478,7 +478,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                dst[0]->cat5.type = TYPE_F32;
                break;
        case nir_op_fddx_fine:
-               dst[0] = ir3_DSXPP_1(b, src[0], 0);
+               dst[0] = ir3_DSXPP_MACRO(b, src[0], 0);
                dst[0]->cat5.type = TYPE_F32;
                break;
        case nir_op_fddy:
@@ -488,7 +488,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                break;
                break;
        case nir_op_fddy_fine:
-               dst[0] = ir3_DSYPP_1(b, src[0], 0);
+               dst[0] = ir3_DSYPP_MACRO(b, src[0], 0);
                dst[0]->cat5.type = TYPE_F32;
                break;
        case nir_op_flt:
index c4d90c15ba94554d66b2b47e8ac0f923423438e9..69efdbabc3f1828854d0b7260c447a4c8b944675 100644 (file)
@@ -233,13 +233,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                        list_addtail(&n->node, &block->instr_list);
                }
 
-               if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) {
-                       struct ir3_instruction *op_p = ir3_instr_clone(n);
-                       op_p->flags = IR3_INSTR_P;
-
-                       ctx->so->need_fine_derivatives = true;
-               }
-
                if (is_sfu(n))
                        regmask_set(&state->needs_ss, n->regs[0]);
 
@@ -360,6 +353,42 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
        return true;
 }
 
+/* Expands dsxpp and dsypp macros to:
+ *
+ * dsxpp.1 dst, src
+ * dsxpp.1.p dst, src
+ *
+ * We apply this after flags syncing, as we don't want to sync in between the
+ * two (which might happen if dst == src).  We do it before nop scheduling
+ * because that needs to count actual instructions.
+ */
+static bool
+apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
+{
+       struct list_head instr_list;
+
+       /* remove all the instructions from the list, we'll be adding
+        * them back in as we go
+        */
+       list_replace(&block->instr_list, &instr_list);
+       list_inithead(&block->instr_list);
+
+       foreach_instr_safe (n, &instr_list) {
+               list_addtail(&n->node, &block->instr_list);
+
+               if (n->opc == OPC_DSXPP_MACRO || n->opc == OPC_DSYPP_MACRO) {
+                       n->opc = (n->opc == OPC_DSXPP_MACRO) ? OPC_DSXPP_1 : OPC_DSYPP_1;
+
+                       struct ir3_instruction *op_p = ir3_instr_clone(n);
+                       op_p->flags = IR3_INSTR_P;
+
+                       ctx->so->need_fine_derivatives = true;
+               }
+       }
+
+       return true;
+}
+
 /* NOTE: branch instructions are always the last instruction(s)
  * in the block.  We take advantage of this as we resolve the
  * branches, since "if (foo) break;" constructs turn into
@@ -752,6 +781,11 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
        block_sched(ir);
        if (so->type == MESA_SHADER_FRAGMENT)
                kill_sched(ir, so);
+
+       foreach_block (block, &ir->block_list) {
+               progress |= apply_fine_deriv_macro(ctx, block);
+       }
+
        nop_sched(ir);
 
        do {