freedreno/ir3: optimize shader key comparision
authorRob Clark <robclark@freedesktop.org>
Tue, 14 Oct 2014 20:23:18 +0000 (16:23 -0400)
committerRob Clark <robclark@freedesktop.org>
Wed, 15 Oct 2014 19:49:48 +0000 (15:49 -0400)
Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_context.h
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a3xx/fd3_texture.c
src/gallium/drivers/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index 324edb2eb808d3b5eb3eb724dd4cca32c21166cc..77e4605e55045030fe9485da216377a80fbadbbf 100644 (file)
@@ -92,6 +92,9 @@ struct fd3_context {
        struct u_upload_mgr *border_color_uploader;
        struct pipe_resource *border_color_buf;
 
+       /* if *any* of bits are set in {v,f}saturate_{s,t,r} */
+       bool vsaturate, fsaturate;
+
        /* bitmask of sampler which needs coords clamped for vertex
         * shader:
         */
index ccedb391fed32c195b689954c7a37f6c9066c530..7cc24e598e290ff2d8e9b1cada204e24709cabb7 100644 (file)
@@ -82,18 +82,20 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
        struct fd3_context *fd3_ctx = fd3_context(ctx);
        struct ir3_shader_key *last_key = &fd3_ctx->last_key;
 
-       if (memcmp(last_key, key, sizeof(*key))) {
+       if (!ir3_shader_key_equal(last_key, key)) {
                ctx->dirty |= FD_DIRTY_PROG;
 
-               if ((last_key->vsaturate_s != key->vsaturate_s) ||
-                               (last_key->vsaturate_t != key->vsaturate_t) ||
-                               (last_key->vsaturate_r != key->vsaturate_r))
-                       ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+               if (last_key->has_per_samp || key->has_per_samp) {
+                       if ((last_key->vsaturate_s != key->vsaturate_s) ||
+                                       (last_key->vsaturate_t != key->vsaturate_t) ||
+                                       (last_key->vsaturate_r != key->vsaturate_r))
+                               ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
 
-               if ((last_key->fsaturate_s != key->fsaturate_s) ||
-                               (last_key->fsaturate_t != key->fsaturate_t) ||
-                               (last_key->fsaturate_r != key->fsaturate_r))
-                       ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+                       if ((last_key->fsaturate_s != key->fsaturate_s) ||
+                                       (last_key->fsaturate_t != key->fsaturate_t) ||
+                                       (last_key->fsaturate_r != key->fsaturate_r))
+                               ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+               }
 
                if (last_key->color_two_side != key->color_two_side)
                        ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
@@ -124,6 +126,7 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
                        // TODO set .half_precision based on render target format,
                        // ie. float16 and smaller use half, float32 use full..
                        .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+                       .has_per_samp = fd3_ctx->fsaturate || fd3_ctx->vsaturate,
                        .vsaturate_s = fd3_ctx->vsaturate_s,
                        .vsaturate_t = fd3_ctx->vsaturate_t,
                        .vsaturate_r = fd3_ctx->vsaturate_r,
index d70b39e21142865a25b3f77c3728005ecf40992e..39befef7672434cc8517e368b823aaeb3746ea87 100644 (file)
@@ -143,7 +143,7 @@ fd3_sampler_states_bind(struct pipe_context *pctx,
 {
        struct fd_context *ctx = fd_context(pctx);
        struct fd3_context *fd3_ctx = fd3_context(ctx);
-       unsigned saturate_s = 0, saturate_t = 0, saturate_r = 0;
+       uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0;
        unsigned i;
 
        for (i = 0; i < nr; i++) {
@@ -162,10 +162,18 @@ fd3_sampler_states_bind(struct pipe_context *pctx,
        fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
 
        if (shader == PIPE_SHADER_FRAGMENT) {
+               fd3_ctx->fsaturate =
+                       (saturate_s != 0) ||
+                       (saturate_t != 0) ||
+                       (saturate_r != 0);
                fd3_ctx->fsaturate_s = saturate_s;
                fd3_ctx->fsaturate_t = saturate_t;
                fd3_ctx->fsaturate_r = saturate_r;
        } else if (shader == PIPE_SHADER_VERTEX) {
+               fd3_ctx->vsaturate =
+                       (saturate_s != 0) ||
+                       (saturate_t != 0) ||
+                       (saturate_r != 0);
                fd3_ctx->vsaturate_s = saturate_s;
                fd3_ctx->vsaturate_t = saturate_t;
                fd3_ctx->vsaturate_r = saturate_r;
index c77cec10cc7f7d19b60a983fef68d053a562c587..1f7e869d9f3f4c60adf1519e49fa48c9f3a2c4cd 100644 (file)
@@ -182,23 +182,30 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
         * so normalize the key to avoid constructing multiple identical
         * variants:
         */
-       if (shader->type == SHADER_FRAGMENT) {
+       switch (shader->type) {
+       case SHADER_FRAGMENT:
+       case SHADER_COMPUTE:
                key.binning_pass = false;
-               key.vsaturate_s = 0;
-               key.vsaturate_t = 0;
-               key.vsaturate_r = 0;
-       }
-       if (shader->type == SHADER_VERTEX) {
+               if (key.has_per_samp) {
+                       key.vsaturate_s = 0;
+                       key.vsaturate_t = 0;
+                       key.vsaturate_r = 0;
+               }
+               break;
+       case SHADER_VERTEX:
                key.color_two_side = false;
                key.half_precision = false;
                key.alpha = false;
-               key.fsaturate_s = 0;
-               key.fsaturate_t = 0;
-               key.fsaturate_r = 0;
+               if (key.has_per_samp) {
+                       key.fsaturate_s = 0;
+                       key.fsaturate_t = 0;
+                       key.fsaturate_r = 0;
+               }
+               break;
        }
 
        for (v = shader->variants; v; v = v->next)
-               if (!memcmp(&key, &v->key, sizeof(key)))
+               if (ir3_shader_key_equal(&key, &v->key))
                        return v;
 
        /* compile new variant if it doesn't exist already: */
index c531ad704cc020be5b6c2640cccc322b2d082961..628c09e1be3f8b963da086fcd46397bdf20cfde8 100644 (file)
@@ -54,36 +54,54 @@ static inline uint16_t sem2idx(ir3_semantic sem)
  * in hw (two sided color), binning-pass vertex shader, etc.
  */
 struct ir3_shader_key {
+       union {
+               struct {
+                       /* do we need to check {v,f}saturate_{s,t,r}? */
+                       unsigned has_per_samp : 1;
+
+                       /*
+                        * Vertex shader variant parameters:
+                        */
+                       unsigned binning_pass : 1;
+
+                       /*
+                        * Fragment shader variant parameters:
+                        */
+                       unsigned color_two_side : 1;
+                       unsigned half_precision : 1;
+                       /* For rendering to alpha, we need a bit of special handling
+                        * since the hw always takes gl_FragColor starting from x
+                        * component, rather than figuring out to take the w component.
+                        * We could be more clever and generate variants for other
+                        * render target formats (ie. luminance formats are xxx1), but
+                        * let's start with this and see how it goes:
+                        */
+                       unsigned alpha : 1;
+               };
+               uint32_t global;
+       };
+
        /* bitmask of sampler which needs coords clamped for vertex
         * shader:
         */
-       unsigned vsaturate_s, vsaturate_t, vsaturate_r;
+       uint16_t vsaturate_s, vsaturate_t, vsaturate_r;
 
        /* bitmask of sampler which needs coords clamped for frag
         * shader:
         */
-       unsigned fsaturate_s, fsaturate_t, fsaturate_r;
-
-       /*
-        * Vertex shader variant parameters:
-        */
-       unsigned binning_pass : 1;
+       uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
 
-       /*
-        * Fragment shader variant parameters:
-        */
-       unsigned color_two_side : 1;
-       unsigned half_precision : 1;
-       /* For rendering to alpha, we need a bit of special handling
-        * since the hw always takes gl_FragColor starting from x
-        * component, rather than figuring out to take the w component.
-        * We could be more clever and generate variants for other
-        * render target formats (ie. luminance formats are xxx1), but
-        * let's start with this and see how it goes:
-        */
-       unsigned alpha : 1;
 };
 
+static inline bool
+ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b)
+{
+       /* slow-path if we need to check {v,f}saturate_{s,t,r} */
+       if (a->has_per_samp || b->has_per_samp)
+               return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0;
+       return a->global == b->global;
+}
+
 struct ir3_shader_variant {
        struct fd_bo *bo;