freedreno/a3xx: fix TOTALATTRTOVS
authorRob Clark <robclark@freedesktop.org>
Tue, 22 Apr 2014 19:43:51 +0000 (15:43 -0400)
committerRob Clark <robclark@freedesktop.org>
Wed, 23 Apr 2014 11:32:16 +0000 (07:32 -0400)
In cases where varying fetches are optimized away (just pass-through in
vertex shader, but unused in fragment shader) we need to calculate the
correct TOTALATTROVS based on the actual number of varyings fetched,
otherwise lockup.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_program.h

index fd385d77024e5372d2f947fb5f9d2655a911af91..4f8dcc5fe6116698feabe832ce22e3ffe75b12cb 100644 (file)
@@ -2317,12 +2317,14 @@ fd3_compile_shader(struct fd3_shader_variant *so,
        actual_in = 0;
        for (i = 0; i < so->inputs_count; i++) {
                unsigned j, regid = ~0, compmask = 0;
+               so->inputs[i].ncomp = 0;
                for (j = 0; j < 4; j++) {
                        struct ir3_instruction *in = inputs[(i*4) + j];
                        if (in) {
                                compmask |= (1 << j);
                                regid = in->regs[0]->num - j;
                                actual_in++;
+                               so->inputs[i].ncomp++;
                        }
                }
                so->inputs[i].regid = regid;
index ee58591fffcb9451cbe24887232b4e7c58e1aa9d..ddb69243c11cc86f814932c6d077dab8133f4c55 100644 (file)
@@ -1326,6 +1326,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 
                so->inputs[n].semantic = decl_semantic(&decl->Semantic);
                so->inputs[n].compmask = (1 << ncomp) - 1;
+               so->inputs[n].ncomp = ncomp;
                so->inputs[n].regid = r;
                so->inputs[n].inloc = ctx->next_inloc;
                so->inputs[n].bary = true;   /* all that is supported */
index b1cf3fd131a16b543b0a174216ad239e698b9214..c78d5e83a936a69c0764ebba2b6a0fe33d612312 100644 (file)
@@ -311,6 +311,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
                struct fd3_vertex_buf *vbufs, uint32_t n)
 {
        uint32_t i, j, last = 0;
+       uint32_t total_in = 0;
 
        n = MIN2(n, vp->inputs_count);
 
@@ -343,12 +344,13 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
                                        A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
                                        COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
 
+                       total_in += vp->inputs[i].ncomp;
                        j++;
                }
        }
 
        OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
-       OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |
+       OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
                        A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
                        A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
                        A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
index e0866c1d008a18da1a11fc651a71d1918b7569d0..0439d39dbffe68af818d41e38c5641c47f397a26 100644 (file)
@@ -100,6 +100,7 @@ struct fd3_shader_variant {
                fd3_semantic semantic;
                uint8_t regid;
                uint8_t compmask;
+               uint8_t ncomp;
                /* in theory inloc of fs should match outloc of vs: */
                uint8_t inloc;
                uint8_t bary;