freedreno/ir3: improve handling of aliased inputs
authorJonathan Marek <jonathan@marek.ca>
Thu, 13 Aug 2020 01:57:15 +0000 (21:57 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 1 Sep 2020 15:10:47 +0000 (15:10 +0000)
This allows overlapping inputs, which is required for the next patch which
makes it so setup_input may be called multiple times for each input.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6181>

.gitlab-ci/deqp-freedreno-a307-fails.txt
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_context.c

index 68d6f1ee9680da24634c5f1ae9c846b2aa07dde1..bb0bbb938a52298e5d40c05d675c678b53953b2d 100644 (file)
@@ -475,7 +475,6 @@ dEQP-GLES3.functional.transform_feedback.random.interleaved.lines.8
 dEQP-GLES3.functional.transform_feedback.random.interleaved.lines.9
 dEQP-GLES3.functional.transform_feedback.random.interleaved.triangles.1
 dEQP-GLES3.functional.transform_feedback.random.interleaved.triangles.3
 dEQP-GLES3.functional.transform_feedback.random.interleaved.lines.9
 dEQP-GLES3.functional.transform_feedback.random.interleaved.triangles.1
 dEQP-GLES3.functional.transform_feedback.random.interleaved.triangles.3
-dEQP-GLES3.functional.transform_feedback.random.interleaved.triangles.8
 dEQP-GLES3.functional.transform_feedback.random.separate.lines.10
 dEQP-GLES3.functional.transform_feedback.random.separate.lines.2
 dEQP-GLES3.functional.transform_feedback.random.separate.lines.4
 dEQP-GLES3.functional.transform_feedback.random.separate.lines.10
 dEQP-GLES3.functional.transform_feedback.random.separate.lines.2
 dEQP-GLES3.functional.transform_feedback.random.separate.lines.4
index d7999ec2831dc9340ca5672b8efcdb93798af886..ddd9bf4a4f5a5f3364bb41456396503b7ef819e6 100644 (file)
@@ -2956,6 +2956,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
        unsigned n = in->data.driver_location;
        unsigned frac = in->data.location_frac;
        unsigned slot = in->data.location;
        unsigned n = in->data.driver_location;
        unsigned frac = in->data.location_frac;
        unsigned slot = in->data.location;
+       unsigned compmask;
 
        /* Inputs are loaded using ldlw or ldg for these stages. */
        if (ctx->so->type == MESA_SHADER_TESS_CTRL ||
 
        /* Inputs are loaded using ldlw or ldg for these stages. */
        if (ctx->so->type == MESA_SHADER_TESS_CTRL ||
@@ -2970,8 +2971,18 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
        if (ncomp > 4)
                return;
 
        if (ncomp > 4)
                return;
 
+       if (ctx->so->type == MESA_SHADER_FRAGMENT)
+               compmask = BITFIELD_MASK(ncomp) << frac;
+       else
+               compmask = BITFIELD_MASK(ncomp + frac);
+
+       /* remove any already set set components */
+       compmask &= ~so->inputs[n].compmask;
+       if (!compmask)
+               return;
+
        so->inputs[n].slot = slot;
        so->inputs[n].slot = slot;
-       so->inputs[n].compmask |= (1 << (ncomp + frac)) - 1;
+       so->inputs[n].compmask |= compmask;
        so->inputs_count = MAX2(so->inputs_count, n + 1);
        so->inputs[n].interpolate = in->data.interpolation;
 
        so->inputs_count = MAX2(so->inputs_count, n + 1);
        so->inputs[n].interpolate = in->data.interpolation;
 
@@ -2986,6 +2997,9 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
                        struct ir3_instruction *instr = NULL;
                        unsigned idx = (n * 4) + i + frac;
 
                        struct ir3_instruction *instr = NULL;
                        unsigned idx = (n * 4) + i + frac;
 
+                       if (!(compmask & (1 << (i + frac))))
+                               continue;
+
                        if (slot == VARYING_SLOT_POS) {
                                ir3_context_error(ctx, "fragcoord should be a sysval!\n");
                        } else {
                        if (slot == VARYING_SLOT_POS) {
                                ir3_context_error(ctx, "fragcoord should be a sysval!\n");
                        } else {
@@ -3017,17 +3031,11 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
                                instr = create_frag_input(ctx, so->inputs[n].use_ldlv, idx);
                        }
 
                                instr = create_frag_input(ctx, so->inputs[n].use_ldlv, idx);
                        }
 
-                       compile_assert(ctx, idx < ctx->ninputs);
-
+                       compile_assert(ctx, idx < ctx->ninputs && !ctx->inputs[idx]);
                        ctx->inputs[idx] = instr;
                }
        } else if (ctx->so->type == MESA_SHADER_VERTEX) {
                struct ir3_instruction *input = NULL;
                        ctx->inputs[idx] = instr;
                }
        } else if (ctx->so->type == MESA_SHADER_VERTEX) {
                struct ir3_instruction *input = NULL;
-               struct ir3_instruction *components[4];
-               /* input as setup as frac=0 with "ncomp + frac" components,
-                * this avoids getting a sparse writemask
-                */
-               unsigned mask = (1 << (ncomp + frac)) - 1;
 
                foreach_input (in, ctx->ir) {
                        if (in->input.inidx == n) {
 
                foreach_input (in, ctx->ir) {
                        if (in->input.inidx == n) {
@@ -3037,57 +3045,32 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
                }
 
                if (!input) {
                }
 
                if (!input) {
-                       input = create_input(ctx, mask);
+                       input = create_input(ctx, compmask);
                        input->input.inidx = n;
                } else {
                        /* For aliased inputs, just append to the wrmask.. ie. if we
                         * first see a vec2 index at slot N, and then later a vec4,
                         * the wrmask of the resulting overlapped vec2 and vec4 is 0xf
                        input->input.inidx = n;
                } else {
                        /* For aliased inputs, just append to the wrmask.. ie. if we
                         * first see a vec2 index at slot N, and then later a vec4,
                         * the wrmask of the resulting overlapped vec2 and vec4 is 0xf
-                        *
-                        * If the new input that aliases a previously processed input
-                        * sets no new bits, then just bail as there is nothing to see
-                        * here.
                         */
                         */
-                       if (!(mask & ~input->regs[0]->wrmask))
-                               return;
-                       input->regs[0]->wrmask |= mask;
+                       input->regs[0]->wrmask |= compmask;
                }
 
                }
 
-               ir3_split_dest(ctx->block, components, input, 0, ncomp + frac);
-
                for (int i = 0; i < ncomp + frac; i++) {
                        unsigned idx = (n * 4) + i;
                        compile_assert(ctx, idx < ctx->ninputs);
 
                for (int i = 0; i < ncomp + frac; i++) {
                        unsigned idx = (n * 4) + i;
                        compile_assert(ctx, idx < ctx->ninputs);
 
-                       /* With aliased inputs, since we add to the wrmask above, we
-                        * can end up with stale meta:split instructions in the inputs
-                        * table.  This is basically harmless, since eventually they
-                        * will get swept away by DCE, but the mismatch wrmask (since
-                        * they would be using the previous wrmask before we OR'd in
-                        * more bits) angers ir3_validate.  So just preemptively clean
-                        * them up.  See:
-                        *
-                        * dEQP-GLES2.functional.attribute_location.bind_aliasing.cond_vec2
-                        *
-                        * Note however that split_dest() will return the src if it is
-                        * scalar, so the previous ctx->inputs[idx] could be the input
-                        * itself (which we don't want to remove)
-                        */
+                       /* fixup the src wrmask to avoid validation fail */
                        if (ctx->inputs[idx] && (ctx->inputs[idx] != input)) {
                        if (ctx->inputs[idx] && (ctx->inputs[idx] != input)) {
-                               list_del(&ctx->inputs[idx]->node);
+                               ctx->inputs[idx]->regs[1]->wrmask = input->regs[0]->wrmask;
+                               continue;
                        }
 
                        }
 
-                       ctx->inputs[idx] = components[i];
+                       ir3_split_dest(ctx->block, &ctx->inputs[idx], input, i, 1);
                }
                }
-       } else {
-               ir3_context_error(ctx, "unknown shader type: %d\n", ctx->so->type);
        }
 
        }
 
-       /* note: this can be wrong for sparse vertex inputs, this happens with
-        * vulkan, only a3xx/a4xx use this value for VS, so it shouldn't matter
-        */
        if (so->inputs[n].bary || (ctx->so->type == MESA_SHADER_VERTEX)) {
        if (so->inputs[n].bary || (ctx->so->type == MESA_SHADER_VERTEX)) {
-               so->total_in += ncomp;
+               so->total_in += util_bitcount(compmask);
        }
 }
 
        }
 }
 
index 2d1ed21d9e37ee84d2589e34db64b7dbf0da65a3..e22fef1e68be64a95b9b1ffe01ab2050c731ffeb 100644 (file)
@@ -360,9 +360,12 @@ ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
 {
        struct ir3_instruction *prev = NULL;
 
 {
        struct ir3_instruction *prev = NULL;
 
-       if ((n == 1) && (src->regs[0]->wrmask == 0x1)) {
+       if ((n == 1) && (src->regs[0]->wrmask == 0x1) &&
+               /* setup_input needs ir3_split_dest to generate a SPLIT instruction */
+               src->opc != OPC_META_INPUT) {
                dst[0] = src;
                return;
                dst[0] = src;
                return;
+
        }
 
        if (src->opc == OPC_META_COLLECT) {
        }
 
        if (src->opc == OPC_META_COLLECT) {