freedreno/ir3: track # of samplers used by shader
authorRob Clark <robdclark@gmail.com>
Tue, 2 Oct 2018 20:04:39 +0000 (16:04 -0400)
committerRob Clark <robdclark@gmail.com>
Wed, 17 Oct 2018 16:44:48 +0000 (12:44 -0400)
This is useful for a6xx to avoid program state from depending on bound
tex/samp state.

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a3xx/fd3_program.c
src/gallium/drivers/freedreno/a4xx/fd4_program.c
src/gallium/drivers/freedreno/a5xx/fd5_program.c
src/gallium/drivers/freedreno/a6xx/fd6_program.c
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
src/gallium/drivers/freedreno/ir3/ir3_legalize.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index ac2f76b823a2be368d45d06a967e5b32fa84cf57..9d5c7b661fdb741f6cdd12ab2b2ca60f99f148ab 100644 (file)
@@ -334,7 +334,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
                                A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
                                A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
                                A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
-                               COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+                               COND(fp->num_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
                                A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
                OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
                                A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
index dcdee3b90db6c522cdb4bf2953f5dd409df46ab2..860f615e31d78a9630e2335c6543d755cc8cf2d3 100644 (file)
@@ -318,7 +318,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
                        A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
                        A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
                        A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
-                       COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+                       COND(s[VS].v->num_samp > 0, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
        OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) |
                        A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
        OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
@@ -393,7 +393,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
                                A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
                                A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
                                A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
-                               COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+                               COND(s[FS].v->num_samp > 0, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
                OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
                                0x80000000 |      /* XXX */
                                COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) |
index ba3339fb1d2365299df9dd06084c023ebb58ab48..2a6e3334aed7e77feaa12efa21f6a0c185cfaad8 100644 (file)
@@ -442,7 +442,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
                        0x6 | /* XXX seems to be always set? */
                        A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
-                       COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+                       COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
        struct ir3_shader_linkage l = {0};
        ir3_link_shaders(&l, s[VS].v, s[FS].v);
@@ -566,7 +566,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
                        A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
                        A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
-                       COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+                       COND(s[FS].v->num_samp > 0, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
        OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
        OUT_RING(ring, 0x020fffff);        /* XXX */
index cbb561a3b8dd1dceb1db86917979bddb2c125f39..c6d062a3a9a9c92d97badca37199c4e91e0a2062 100644 (file)
@@ -334,14 +334,10 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
         * emitted if frag-prog is dirty vs if vert-prog is dirty..
         */
 
-       OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1);
-       OUT_RING(ring, 0);
-
-       struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_VERTEX];
        OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 2);
        OUT_RING(ring, COND(s[VS].v, A6XX_SP_VS_CONFIG_ENABLED) |
-                        A6XX_SP_VS_CONFIG_NTEX(tex->num_textures) |
-                        A6XX_SP_VS_CONFIG_NSAMP(tex->num_samplers));     /* SP_VS_CONFIG */
+                        A6XX_SP_VS_CONFIG_NTEX(s[VS].v->num_samp) |
+                        A6XX_SP_VS_CONFIG_NSAMP(s[VS].v->num_samp));     /* SP_VS_CONFIG */
        OUT_RING(ring, s[VS].instrlen);                                                   /* SP_VS_INSTRLEN */
 
        OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1);
@@ -371,12 +367,10 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
        OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_AB00, 1);
        OUT_RING(ring, 0x5);
 
-       tex = &ctx->tex[PIPE_SHADER_FRAGMENT];
        OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 2);
        OUT_RING(ring, COND(s[FS].v, A6XX_SP_FS_CONFIG_ENABLED) |
-                        A6XX_SP_FS_CONFIG_NTEX(tex->num_textures) |
-                        A6XX_SP_FS_CONFIG_NSAMP(tex->num_samplers));
-                                                                                                                       /* SP_FS_CONFIG */
+                        A6XX_SP_FS_CONFIG_NTEX(s[FS].v->num_samp) |
+                        A6XX_SP_FS_CONFIG_NSAMP(s[FS].v->num_samp));     /* SP_FS_CONFIG */
        OUT_RING(ring, s[FS].instrlen);                                                   /* SP_FS_INSTRLEN */
 
        OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4);
@@ -393,7 +387,7 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
                        A6XX_SP_VS_CTRL_REG0_MERGEDREGS |
                        A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
-                       COND(s[VS].v->has_samp, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+                       COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
        struct ir3_shader_linkage l = {0};
        ir3_link_shaders(&l, s[VS].v, s[FS].v);
@@ -517,7 +511,7 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
                        A6XX_SP_FS_CTRL_REG0_MERGEDREGS |
                        A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
-                       COND(s[FS].v->has_samp, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+                       COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
        OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1);
        OUT_RING(ring, 0);        /* XXX */
index 63215cefc96a6a8fa07867c78b37c8df10b6bf37..3055c10f1ddb96fdda1b0494cce5d9981020ca5e 100644 (file)
@@ -1005,7 +1005,7 @@ int ir3_ra(struct ir3 *ir3, enum shader_t type,
                bool frag_coord, bool frag_face);
 
 /* legalize: */
-void ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary);
+void ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary);
 
 /* ************************************************************************* */
 /* instruction helpers */
index 14dcb3502e8a4cae7c3d6eb1d2171569a304e37e..9f6a06cf33326e982005e602fd781ec084b3f5d8 100644 (file)
@@ -3760,7 +3760,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        /* We need to do legalize after (for frag shader's) the "bary.f"
         * offsets (inloc) have been assigned.
         */
-       ir3_legalize(ir, &so->has_samp, &so->has_ssbo, &max_bary);
+       ir3_legalize(ir, &so->num_samp, &so->has_ssbo, &max_bary);
 
        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
                printf("AFTER LEGALIZE:\n");
index 8c012106bf8ed5884f61ed97eca5350ffba43128..18bdc6ceb1f234cc748e2d91792f87da8ecf0d70 100644 (file)
@@ -40,7 +40,7 @@
  */
 
 struct ir3_legalize_ctx {
-       bool has_samp;
+       int num_samp;
        bool has_ssbo;
        int max_bary;
 };
@@ -214,7 +214,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                         * the samp instruction(s) could get eliminated if the
                         * result is not used.
                         */
-                       ctx->has_samp = true;
+                       ctx->num_samp = MAX2(ctx->num_samp, n->cat5.samp + 1);
                        regmask_set(&state->needs_sy, n->regs[0]);
                } else if (n->opc == OPC_RESINFO) {
                        regmask_set(&state->needs_ss, n->regs[0]);
@@ -463,7 +463,7 @@ mark_convergence_points(struct ir3 *ir)
 }
 
 void
-ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary)
+ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary)
 {
        struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
        bool progress;
@@ -483,7 +483,7 @@ ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary)
                }
        } while (progress);
 
-       *has_samp = ctx->has_samp;
+       *num_samp = ctx->num_samp;
        *has_ssbo = ctx->has_ssbo;
        *max_bary = ctx->max_bary;
 
index b237f8a08fece36f7f4ff1e732001d5b3ef6499f..6bc24f47d75e870ba09a8437b0a111660dc1f31b 100644 (file)
@@ -298,8 +298,8 @@ struct ir3_shader_variant {
         */
        unsigned varying_in;
 
-       /* do we have one or more texture sample instructions: */
-       bool has_samp;
+       /* number of samplers/textures (which are currently 1:1): */
+       int num_samp;
 
        /* do we have one or more SSBO instructions: */
        bool has_ssbo;