freedreno/a3xx: deal with optimized tex instructions
authorRob Clark <robclark@freedesktop.org>
Tue, 8 Apr 2014 18:14:43 +0000 (14:14 -0400)
committerRob Clark <robclark@freedesktop.org>
Tue, 8 Apr 2014 20:06:49 +0000 (16:06 -0400)
Keep track of whether we actually have any sam instructions in the
resulting shader, rather than using TGSI SAMP declarations.  If the sam
instruction is optimized out, because the result is not used, we don't
want to emit texture state, etc.  In fact emitting sampler state and/or
setting PIXLODENABLE bit when there are no texture fetches seems to
cause lockup.

In theory this should never happen for a "normal" shader, unless the
state tracker is wonky.  But it is a very real possibility for binning
pass shaders.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_program.c
src/gallium/drivers/freedreno/a3xx/fd3_program.h
src/gallium/drivers/freedreno/a3xx/ir3.h
src/gallium/drivers/freedreno/a3xx/ir3_ra.c

index 1d99e5caa9942e9210436da7ee8194f032d3efe4..911330cde2a49b8246760d92287447bfcfbff130 100644 (file)
@@ -2054,12 +2054,6 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
        }
 }
 
-static void
-decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
-{
-       ctx->so->samplers_count++;
-}
-
 /* from TGSI perspective, we actually have inputs.  But most of the "inputs"
  * for a fragment shader are just bary.f instructions.  The *actual* inputs
  * from the hw perspective are the frag_pos and optionally frag_coord and
@@ -2160,8 +2154,6 @@ compile_instructions(struct fd3_compile_context *ctx)
                                decl_out(ctx, decl);
                        } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
                                decl_in(ctx, decl);
-                       } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
-                               decl_samp(ctx, decl);
                        }
                        break;
                }
@@ -2320,7 +2312,7 @@ fd3_compile_shader(struct fd3_shader_variant *so,
        }
 
        ret = ir3_block_ra(block, so->type, key.half_precision,
-                       so->frag_coord, so->frag_face);
+                       so->frag_coord, so->frag_face, &so->has_samp);
        if (ret)
                goto out;
 
index 76de287b16386e435173f02ae1b683e51b81cfbd..ee58591fffcb9451cbe24887232b4e7c58e1aa9d 100644 (file)
@@ -1417,7 +1417,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 static void
 decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 {
-       ctx->so->samplers_count++;
+       ctx->so->has_samp = true;
 }
 
 static void
index 00f1014444be46420954908388902adb2f49126b..b1cf3fd131a16b543b0a174216ad239e698b9214 100644 (file)
@@ -177,7 +177,7 @@ emit_textures(struct fd_ringbuffer *ring,
                                CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
                for (i = 0; i < tex->num_samplers; i++) {
                        static const struct fd3_sampler_stateobj dummy_sampler = {};
-                       struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
+                       const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
                                        fd3_sampler_stateobj(tex->samplers[i]) :
                                        &dummy_sampler;
                        OUT_RING(ring, sampler->texsamp0);
@@ -542,11 +542,19 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
        if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
                fd_wfi(ctx, ring);
 
-       if (dirty & FD_DIRTY_VERTTEX)
-               emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+       if (dirty & FD_DIRTY_VERTTEX) {
+               if (vp->has_samp)
+                       emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+               else
+                       dirty &= ~FD_DIRTY_VERTTEX;
+       }
 
-       if (dirty & FD_DIRTY_FRAGTEX)
-               emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+       if (dirty & FD_DIRTY_FRAGTEX) {
+               if (fp->has_samp)
+                       emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+               else
+                       dirty &= ~FD_DIRTY_FRAGTEX;
+       }
 
        ctx->dirty &= ~dirty;
 }
index 09cadf81cbf2cd508a06c474e96db700c1f1f329..b5544e8c358b14ef9ef0d06f0b7ed59596edeedc 100644 (file)
@@ -120,7 +120,7 @@ create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
                        v->inputs_count = 0;
                        v->outputs_count = 0;
                        v->total_in = 0;
-                       v->samplers_count = 0;
+                       v->has_samp = false;
                        v->immediates_count = 0;
                }
        } else {
@@ -397,7 +397,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
                        A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
                        A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
                        A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
-                       COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
+                       COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
                        A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen));
        OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
                        A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
@@ -475,7 +475,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
                                A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
                                A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
                                A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
-                               COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+                               COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
                                A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
                OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
                                A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
index 8d4fd57ae752b602faf8313a2161d80dcaee5453..e0866c1d008a18da1a11fc651a71d1918b7569d0 100644 (file)
@@ -107,8 +107,8 @@ struct fd3_shader_variant {
 
        unsigned total_in;       /* sum of inputs (scalar) */
 
-       /* samplers: */
-       unsigned samplers_count;
+       /* do we have one or more texture sample instructions: */
+       bool has_samp;
 
        /* const reg # of first immediate, ie. 1 == c1
         * (not regid, because TGSI thinks in terms of vec4 registers,
index 09052346992d1143a6d90b0f43bdf18f81444b62..872f47883bbc76f3812041e17fa72f9a123b389c 100644 (file)
@@ -385,8 +385,8 @@ void ir3_block_sched(struct ir3_block *block);
 
 /* register assignment: */
 int ir3_block_ra(struct ir3_block *block, enum shader_t type,
-               bool half_precision, bool frag_coord, bool frag_face);
-
+               bool half_precision, bool frag_coord, bool frag_face,
+               bool *has_samp);
 
 #ifndef ARRAY_SIZE
 #  define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
index 4e48eded2bb91f3e2aae938b486d99d55c98bcb1..57c68c729c52248422e64b1664d08d0fca20b75f 100644 (file)
@@ -56,6 +56,7 @@ struct ir3_ra_ctx {
        bool half_precision;
        bool frag_coord;
        bool frag_face;
+       bool has_samp;
        int cnt;
        bool error;
 };
@@ -654,8 +655,17 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                if (is_sfu(n))
                        regmask_set(&needs_ss, n->regs[0]);
 
-               if (is_tex(n))
+               if (is_tex(n)) {
+                       /* this ends up being the # of samp instructions.. but that
+                        * is ok, everything else only cares whether it is zero or
+                        * not.  We do this here, rather than when we encounter a
+                        * SAMP decl, because (especially in binning pass shader)
+                        * the samp instruction(s) could get eliminated if the
+                        * result is not used.
+                        */
+                       ctx->has_samp = true;
                        regmask_set(&needs_sy, n->regs[0]);
+               }
 
                /* both tex/sfu appear to not always immediately consume
                 * their src register(s):
@@ -730,7 +740,8 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 }
 
 int ir3_block_ra(struct ir3_block *block, enum shader_t type,
-               bool half_precision, bool frag_coord, bool frag_face)
+               bool half_precision, bool frag_coord, bool frag_face,
+               bool *has_samp)
 {
        struct ir3_ra_ctx ctx = {
                        .block = block,
@@ -739,6 +750,11 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type,
                        .frag_coord = frag_coord,
                        .frag_face = frag_face,
        };
+       int ret;
+
        ir3_shader_clear_mark(block->shader);
-       return block_ra(&ctx, block);
+       ret = block_ra(&ctx, block);
+       *has_samp = ctx.has_samp;
+
+       return ret;
 }