freedreno/a4xx: only update/emit bordercolor state when needed
authorRob Clark <robclark@freedesktop.org>
Wed, 1 Jun 2016 16:23:58 +0000 (12:23 -0400)
committerRob Clark <robclark@freedesktop.org>
Thu, 2 Jun 2016 19:44:07 +0000 (15:44 -0400)
I noticed in stk that it was contributing to a lot of overhead.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a4xx/fd4_emit.c
src/gallium/drivers/freedreno/a4xx/fd4_texture.c
src/gallium/drivers/freedreno/a4xx/fd4_texture.h

index 00e985d27e5407aaf8c8784e09cd484406a1586b..7d602fc6c92c6116e011808dfb7b5e46444d2f8a 100644 (file)
@@ -131,16 +131,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        [SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
        };
        struct fd4_context *fd4_ctx = fd4_context(ctx);
-       unsigned i, off;
-       void *ptr;
-
-       u_upload_alloc(fd4_ctx->border_color_uploader,
-                       0, BORDER_COLOR_UPLOAD_SIZE,
-                      BORDER_COLOR_UPLOAD_SIZE, &off,
-                       &fd4_ctx->border_color_buf,
-                       &ptr);
-
-       fd_setup_border_colors(tex, ptr, 0);
+       bool needs_border = false;
+       unsigned i;
 
        if (tex->num_samplers > 0) {
                int num_samplers;
@@ -166,6 +158,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
                                        &dummy_sampler;
                        OUT_RING(ring, sampler->texsamp0);
                        OUT_RING(ring, sampler->texsamp1);
+
+                       needs_border |= sampler->needs_border;
                }
 
                for (; i < num_samplers; i++) {
@@ -235,10 +229,22 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
                debug_assert(v->astc_srgb.count == 0);
        }
 
-       OUT_PKT0(ring, bcolor_reg[sb], 1);
-       OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
+       if (needs_border) {
+               unsigned off;
+               void *ptr;
 
-       u_upload_unmap(fd4_ctx->border_color_uploader);
+               u_upload_alloc(fd4_ctx->border_color_uploader,
+                               0, BORDER_COLOR_UPLOAD_SIZE,
+                               BORDER_COLOR_UPLOAD_SIZE, &off,
+                               &fd4_ctx->border_color_buf,
+                               &ptr);
+
+               fd_setup_border_colors(tex, ptr, 0);
+               OUT_PKT0(ring, bcolor_reg[sb], 1);
+               OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
+
+               u_upload_unmap(fd4_ctx->border_color_uploader);
+       }
 }
 
 /* emit texture state for mem->gmem restore operation.. eventually it would
index 6d9ecb7da24316818f26e32f26bfd8436114c1fb..da8c6814b9a47351e2d5454be03c2ca6185b74c9 100644 (file)
@@ -36,7 +36,7 @@
 #include "fd4_format.h"
 
 static enum a4xx_tex_clamp
-tex_clamp(unsigned wrap, bool clamp_to_edge)
+tex_clamp(unsigned wrap, bool clamp_to_edge, bool *needs_border)
 {
        /* Hardware does not support _CLAMP, but we emulate it: */
        if (wrap == PIPE_TEX_WRAP_CLAMP) {
@@ -50,6 +50,7 @@ tex_clamp(unsigned wrap, bool clamp_to_edge)
        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
                return A4XX_TEX_CLAMP_TO_EDGE;
        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+               *needs_border = true;
                return A4XX_TEX_CLAMP_TO_BORDER;
        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
                /* only works for PoT.. need to emulate otherwise! */
@@ -113,14 +114,15 @@ fd4_sampler_state_create(struct pipe_context *pctx,
                so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP);
        }
 
+       so->needs_border = false;
        so->texsamp0 =
                COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
                A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
                A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
                A4XX_TEX_SAMP_0_ANISO(aniso) |
-               A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge)) |
-               A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge)) |
-               A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge));
+               A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge, &so->needs_border)) |
+               A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge, &so->needs_border)) |
+               A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge, &so->needs_border));
 
        so->texsamp1 =
 //             COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
index 21ceadd8c63b547d8858042c9af83320e2947de3..a16c4f9d187b15f1f65cea74637b8fb1fed644fe 100644 (file)
@@ -41,6 +41,7 @@ struct fd4_sampler_stateobj {
        struct pipe_sampler_state base;
        uint32_t texsamp0, texsamp1;
        bool saturate_s, saturate_t, saturate_r;
+       bool needs_border;
 };
 
 static inline struct fd4_sampler_stateobj *