freedreno/a6xx: Only use merged regs and four quads for VS+FS
authorKristian H. Kristensen <hoegsberg@google.com>
Tue, 29 Oct 2019 23:26:34 +0000 (16:26 -0700)
committerKristian H. Kristensen <hoegsberg@google.com>
Fri, 8 Nov 2019 00:40:27 +0000 (16:40 -0800)
When other geometry stages are present, we chose two quads and no
merged regs.

Acked-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Kristian H. Kristensen <hoegsberg@google.com>
Reviewed-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a6xx/fd6_program.c

index 5e54c7ea6d5be41b43b4b8841e2524f913b5328f..213db91ff1ca3d5c32b8f90613083825b2512e88 100644 (file)
@@ -410,13 +410,26 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                         A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) |
                         0xfc000000);
 
+       enum a3xx_threadsize vssz;
+       uint32_t vsregs;
+       if (ds || hs) {
+               vssz = TWO_QUADS;
+               vsregs = 0;
+       } else {
+               vssz = FOUR_QUADS;
+               vsregs = A6XX_SP_VS_CTRL_REG0_MERGEDREGS;
+       }
+
        OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1);
-       OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(fssz) |
+       OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(vssz) |
                        A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
-                       A6XX_SP_VS_CTRL_REG0_MERGEDREGS |
+                       vsregs |
                        A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack) |
                        COND(vs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
+       fd6_emit_shader(ring, vs);
+       ir3_emit_immediates(screen, vs, ring);
+
        struct ir3_shader_linkage l = {0};
        const struct ir3_shader_variant *last_shader = fd6_last_shader(state);
        ir3_link_shaders(&l, last_shader, fs);
@@ -497,9 +510,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                OUT_RING(ring, reg);
        }
 
-       fd6_emit_shader(ring, vs);
-       ir3_emit_immediates(screen, vs, ring);
-
        if (hs) {
                OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1);
                OUT_RING(ring, A6XX_SP_HS_CTRL_REG0_THREADSIZE(TWO_QUADS) |