freedreno/a3xx+a4xx: fix for stk binning pass hang
authorRob Clark <robclark@freedesktop.org>
Mon, 16 Nov 2015 20:07:29 +0000 (15:07 -0500)
committerRob Clark <robclark@freedesktop.org>
Wed, 18 Nov 2015 19:31:13 +0000 (14:31 -0500)
We'd end up in a state where shader uses no inputs, yet num_elements is
greater than zero.  Triggered by a TF vertex shader which did:

  gl_Position = vec4(0.0, 0.0, 0.0, 0.0);

resulting in a binning pass variant with no inputs.

Includes equiv fix in a4xx, even though we don't have binning-pass
enabled yet on a4xx.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a4xx/fd4_emit.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index 8f9c8b0623cb798c02d747f581cb5341e2df915a..25ea3e7a7b72a4ca7bca1bd10952f9f57947c3f9 100644 (file)
@@ -350,7 +350,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
        unsigned instance_regid = regid(63, 0);
        unsigned vtxcnt_regid = regid(63, 0);
 
+       /* Note that sysvals come *after* normal inputs: */
        for (i = 0; i < vp->inputs_count; i++) {
+               if (!vp->inputs[i].compmask)
+                       continue;
                if (vp->inputs[i].sysval) {
                        switch(vp->inputs[i].slot) {
                        case SYSTEM_VALUE_BASE_VERTEX:
@@ -369,18 +372,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
                                unreachable("invalid system value");
                                break;
                        }
-               } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+               } else if (i < vtx->vtx->num_elements) {
                        last = i;
                }
        }
 
-       /* hw doesn't like to be configured for zero vbo's, it seems: */
-       if ((vtx->vtx->num_elements == 0) &&
-                       (vertex_regid == regid(63, 0)) &&
-                       (instance_regid == regid(63, 0)) &&
-                       (vtxcnt_regid == regid(63, 0)))
-               return;
-
        for (i = 0, j = 0; i <= last; i++) {
                assert(!vp->inputs[i].sysval);
                if (vp->inputs[i].compmask) {
@@ -424,6 +420,38 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
                }
        }
 
+       /* hw doesn't like to be configured for zero vbo's, it seems: */
+       if (last < 0) {
+               /* just recycle the shader bo, we just need to point to *something*
+                * valid:
+                */
+               struct fd_bo *dummy_vbo = vp->bo;
+               bool switchnext = (vertex_regid != regid(63, 0)) ||
+                               (instance_regid != regid(63, 0)) ||
+                               (vtxcnt_regid != regid(63, 0));
+
+               OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2);
+               OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+                               A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+                               COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+                               A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) |
+                               A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+               OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+
+               OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1);
+               OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+                               A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+                               A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) |
+                               A3XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+                               A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+                               A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+                               A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+                               COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+               total_in = 1;
+               j = 1;
+       }
+
        OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
        OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
                        A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
index 26b58718cd8bc72673e9ffd55432cb9be18ee353..5a7b192f79d693f7aa3f12d3f2ad160929c23290 100644 (file)
@@ -332,7 +332,10 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
        unsigned instance_regid = regid(63, 0);
        unsigned vtxcnt_regid = regid(63, 0);
 
+       /* Note that sysvals come *after* normal inputs: */
        for (i = 0; i < vp->inputs_count; i++) {
+               if (!vp->inputs[i].compmask)
+                       continue;
                if (vp->inputs[i].sysval) {
                        switch(vp->inputs[i].slot) {
                        case SYSTEM_VALUE_BASE_VERTEX:
@@ -351,19 +354,11 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
                                unreachable("invalid system value");
                                break;
                        }
-               } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+               } else if (i < vtx->vtx->num_elements) {
                        last = i;
                }
        }
 
-
-       /* hw doesn't like to be configured for zero vbo's, it seems: */
-       if ((vtx->vtx->num_elements == 0) &&
-                       (vertex_regid == regid(63, 0)) &&
-                       (instance_regid == regid(63, 0)) &&
-                       (vtxcnt_regid == regid(63, 0)))
-               return;
-
        for (i = 0, j = 0; i <= last; i++) {
                assert(!vp->inputs[i].sysval);
                if (vp->inputs[i].compmask) {
@@ -408,6 +403,38 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
                }
        }
 
+       /* hw doesn't like to be configured for zero vbo's, it seems: */
+       if (last < 0) {
+               /* just recycle the shader bo, we just need to point to *something*
+                * valid:
+                */
+               struct fd_bo *dummy_vbo = vp->bo;
+               bool switchnext = (vertex_regid != regid(63, 0)) ||
+                               (instance_regid != regid(63, 0)) ||
+                               (vtxcnt_regid != regid(63, 0));
+
+               OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
+               OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+                               A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+                               COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+               OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+               OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
+               OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
+
+               OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
+               OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
+                               A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+                               A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
+                               A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+                               A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+                               A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+                               A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+                               COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+               total_in = 1;
+               j = 1;
+       }
+
        OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
        OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
                        0xa0000 | /* XXX */
index 7e2c27d976517a4b0b2e7b9990d0620eff900863..5d1cccb0daa712e5b91cb295860f455d865d68bc 100644 (file)
@@ -166,7 +166,9 @@ struct ir3_shader_variant {
        } outputs[16 + 2];  /* +POSITION +PSIZE */
        bool writes_pos, writes_psize;
 
-       /* vertices/inputs: */
+       /* attributes (VS) / varyings (FS):
+        * Note that sysval's should come *after* normal inputs.
+        */
        unsigned inputs_count;
        struct {
                uint8_t slot;
@@ -229,7 +231,7 @@ struct ir3_shader {
 
        struct ir3_compiler *compiler;
 
-       struct pipe_context *pctx;
+       struct pipe_context *pctx;    /* TODO replace w/ pipe_screen */
        const struct tgsi_token *tokens;
        struct pipe_stream_output_info stream_output;