freedreno/a6xx: texture state obj
authorRob Clark <robdclark@gmail.com>
Wed, 10 Oct 2018 19:59:29 +0000 (15:59 -0400)
committerRob Clark <robdclark@gmail.com>
Wed, 17 Oct 2018 16:44:48 +0000 (12:44 -0400)
Unfortunately gallium doesn't match what the hw wants perfectly here, in
using a separate CSO for each texture/sampler.  So we have to use a hash
table to map the collection of texture/samplers to hw state object.

We probably could use separate hw state objects for texture and sampler
state, but mesa/st tends to update the tex and samp state together.

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a6xx/fd6_context.c
src/gallium/drivers/freedreno/a6xx/fd6_context.h
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/a6xx/fd6_texture.c
src/gallium/drivers/freedreno/a6xx/fd6_texture.h

index b82889c7a71a051dbad33af4aac5588412ac74ad..ab10ccb113b8901936d36046aab86d4be5b37fa2 100644 (file)
@@ -56,6 +56,8 @@ fd6_context_destroy(struct pipe_context *pctx)
 
        fd_context_cleanup_common_vbos(&fd6_ctx->base);
 
+       fd6_texture_fini(pctx);
+
        free(fd6_ctx);
 }
 
index 30cc26001cdc73a098d388cc18223a1f104f2a51..85245c8a65f196aa0a2548e3af3d655b186e8e98 100644 (file)
@@ -105,6 +105,9 @@ struct fd6_context {
        /*{*/
        struct fd6_streamout_state tf;
        /*}*/
+
+       uint16_t tex_seqno;
+       struct hash_table *tex_cache;
 };
 
 static inline struct fd6_context *
index 93f6a267fa98595e2c463e5fb7446532a802b008..eb24fb96cfba89e185ab0a4298dea4879bd453cc 100644 (file)
@@ -325,32 +325,32 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring)
        u_upload_unmap(fd6_ctx->border_color_uploader);
 }
 
-static bool
-emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
-               enum a6xx_state_block sb, struct fd_texture_stateobj *tex)
+bool
+fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
+               enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
+               unsigned bcolor_offset)
 {
        bool needs_border = false;
-       unsigned bcolor_offset;
-       unsigned opcode, tex_samp_reg, tex_const_reg;
+       unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
 
        switch (sb) {
        case SB6_VS_TEX:
                opcode = CP_LOAD_STATE6_GEOM;
-               bcolor_offset = 0;
                tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO;
                tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO;
+               tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;
                break;
        case SB6_FS_TEX:
                opcode = CP_LOAD_STATE6_FRAG;
-               bcolor_offset = ctx->tex[PIPE_SHADER_VERTEX].num_samplers;
                tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO;
                tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO;
+               tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;
                break;
        case SB6_CS_TEX:
                opcode = CP_LOAD_STATE6_FRAG;
-               bcolor_offset = 0;
                tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO;
                tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO;
+               tex_count_reg = 0; //REG_A6XX_SP_CS_TEX_COUNT;
                break;
        default:
                unreachable("bad state block");
@@ -359,8 +359,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        if (tex->num_samplers > 0) {
                struct fd_ringbuffer *state =
-                       fd_ringbuffer_new_flags(ctx->pipe, tex->num_samplers * 4 * 4,
-                                       FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
+                       fd_ringbuffer_new_flags(pipe, tex->num_samplers * 4 * 4,
+                                       FD_RINGBUFFER_OBJECT);
                for (unsigned i = 0; i < tex->num_samplers; i++) {
                        static const struct fd6_sampler_stateobj dummy_sampler = {};
                        const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ?
@@ -390,8 +390,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        if (tex->num_textures > 0) {
                struct fd_ringbuffer *state =
-                       fd_ringbuffer_new_flags(ctx->pipe, tex->num_textures * 16 * 4,
-                                       FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
+                       fd_ringbuffer_new_flags(pipe, tex->num_textures * 16 * 4,
+                                       FD_RINGBUFFER_OBJECT);
                for (unsigned i = 0; i < tex->num_textures; i++) {
                        static const struct fd6_pipe_sampler_view dummy_view = {};
                        const struct fd6_pipe_sampler_view *view = tex->textures[i] ?
@@ -445,6 +445,11 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
                fd_ringbuffer_del(state);
        }
 
+       if (tex_count_reg) {
+               OUT_PKT4(ring, tex_count_reg, 1);
+               OUT_RING(ring, tex->num_textures);
+       }
+
        return needs_border;
 }
 
@@ -931,28 +936,25 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
        }
 
-       if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
-               needs_border |= emit_textures(ctx, ring, SB6_VS_TEX,
-                               &ctx->tex[PIPE_SHADER_VERTEX]);
-               OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1);
-               OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures);
-       }
+       if ((ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) &&
+                       ctx->tex[PIPE_SHADER_VERTEX].num_textures > 0) {
+               struct fd6_texture_state *tex = fd6_texture_state(ctx,
+                               SB6_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX]);
+
+               needs_border |= tex->needs_border;
 
-       if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
-               needs_border |= emit_textures(ctx, ring, SB6_FS_TEX,
-                               &ctx->tex[PIPE_SHADER_FRAGMENT]);
-               OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1);
-               OUT_RING(ring, ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
+               fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_VS_TEX, 0x7);
        }
 
-#if 0
-       OUT_PKT4(ring, REG_A6XX_TPL1_FS_TEX_COUNT, 1);
-       OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask ?
-                       ~0 : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
+       if ((ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) &&
+                       ctx->tex[PIPE_SHADER_FRAGMENT].num_textures > 0) {
+               struct fd6_texture_state *tex = fd6_texture_state(ctx,
+                               SB6_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]);
 
-       OUT_PKT4(ring, REG_A6XX_TPL1_CS_TEX_COUNT, 1);
-       OUT_RING(ring, 0);
-#endif
+               needs_border |= tex->needs_border;
+
+               fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_FS_TEX, 0x7);
+       }
 
        if (needs_border)
                emit_border_color(ctx, ring);
@@ -988,8 +990,8 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        if (dirty & FD_DIRTY_SHADER_TEX) {
                bool needs_border = false;
-               needs_border |= emit_textures(ctx, ring, SB6_CS_TEX,
-                               &ctx->tex[PIPE_SHADER_COMPUTE]);
+               needs_border |= fd6_emit_textures(ctx->pipe, ring, SB6_CS_TEX,
+                               &ctx->tex[PIPE_SHADER_COMPUTE], 0);
 
                if (needs_border)
                        emit_border_color(ctx, ring);
index 4e27597a70b91ef9466396f693e8d0d4bd680230..005952750f8dd57879bbcf8c618263f789900457 100644 (file)
@@ -45,6 +45,8 @@ struct fd_ringbuffer;
 enum fd6_state_id {
        FD6_GROUP_VS_CONST,
        FD6_GROUP_FS_CONST,
+       FD6_GROUP_VS_TEX,
+       FD6_GROUP_FS_TEX,
 };
 
 struct fd6_state_group {
@@ -174,6 +176,10 @@ fd6_stage2shadersb(enum shader_t type)
        }
 }
 
+bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
+               enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
+               unsigned bcolor_offset);
+
 void fd6_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd6_emit *emit);
 
 void fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
index 0f342ae8e2843177b78f03eb57d0811d01cc121e..a48c4ee1ad0b4ff12e05ae06a692283bd01cd5aa 100644 (file)
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "util/hash_table.h"
 
 #include "fd6_texture.h"
 #include "fd6_format.h"
+#include "fd6_emit.h"
+
+static void fd6_texture_state_destroy(struct fd6_texture_state *state);
 
 static enum a6xx_tex_clamp
 tex_clamp(unsigned wrap, bool clamp_to_edge, bool *needs_border)
@@ -94,6 +98,7 @@ fd6_sampler_state_create(struct pipe_context *pctx,
                return NULL;
 
        so->base = *cso;
+       so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno;
 
        if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
                miplinear = true;
@@ -140,6 +145,28 @@ fd6_sampler_state_create(struct pipe_context *pctx,
        return so;
 }
 
+static void
+fd6_sampler_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
+       struct fd6_sampler_stateobj *samp = hwcso;
+
+       struct hash_entry *entry;
+       hash_table_foreach(fd6_ctx->tex_cache, entry) {
+               struct fd6_texture_state *state = entry->data;
+
+               for (unsigned i = 0; i < ARRAY_SIZE(state->key.samp); i++) {
+                       if (samp->seqno == state->key.samp[i].seqno) {
+                               fd6_texture_state_destroy(entry->data);
+                               _mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
+                               break;
+                       }
+               }
+       }
+
+       free(hwcso);
+}
+
 static void
 fd6_sampler_states_bind(struct pipe_context *pctx,
                enum pipe_shader_type shader, unsigned start,
@@ -215,6 +242,7 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
        so->base.texture = prsc;
        so->base.reference.count = 1;
        so->base.context = pctx;
+       so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno;
 
        so->texconst0 =
                A6XX_TEX_CONST_0_FMT(fd6_pipe2tex(format)) |
@@ -309,6 +337,31 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
        return &so->base;
 }
 
+static void
+fd6_sampler_view_destroy(struct pipe_context *pctx,
+               struct pipe_sampler_view *_view)
+{
+       struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
+       struct fd6_pipe_sampler_view *view = fd6_pipe_sampler_view(_view);
+
+       struct hash_entry *entry;
+       hash_table_foreach(fd6_ctx->tex_cache, entry) {
+               struct fd6_texture_state *state = entry->data;
+
+               for (unsigned i = 0; i < ARRAY_SIZE(state->key.view); i++) {
+                       if (view->seqno == state->key.view[i].seqno) {
+                               fd6_texture_state_destroy(entry->data);
+                               _mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
+                               break;
+                       }
+               }
+       }
+
+       pipe_resource_reference(&view->base.texture, NULL);
+
+       free(view);
+}
+
 static void
 fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
                unsigned start, unsigned nr,
@@ -337,11 +390,127 @@ fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
        }
 }
 
+
+static uint32_t
+key_hash(const void *_key)
+{
+       const struct fd6_texture_key *key = _key;
+       uint32_t hash = _mesa_fnv32_1a_offset_bias;
+       hash = _mesa_fnv32_1a_accumulate_block(hash, key, sizeof(*key));
+       return hash;
+}
+
+static bool
+key_equals(const void *_a, const void *_b)
+{
+       const struct fd6_texture_key *a = _a;
+       const struct fd6_texture_key *b = _b;
+       return memcmp(a, b, sizeof(struct fd6_texture_key)) == 0;
+}
+
+struct fd6_texture_state *
+fd6_texture_state(struct fd_context *ctx, enum a6xx_state_block sb,
+               struct fd_texture_stateobj *tex)
+{
+       struct fd6_context *fd6_ctx = fd6_context(ctx);
+       struct fd6_texture_key key;
+       bool needs_border = false;
+
+       memset(&key, 0, sizeof(key));
+
+       for (unsigned i = 0; i < tex->num_textures; i++) {
+               if (!tex->textures[i])
+                       continue;
+
+               struct fd6_pipe_sampler_view *view =
+                       fd6_pipe_sampler_view(tex->textures[i]);
+
+               key.view[i].rsc_seqno = fd_resource(view->base.texture)->seqno;
+               key.view[i].seqno = view->seqno;
+       }
+
+       for (unsigned i = 0; i < tex->num_samplers; i++) {
+               if (!tex->samplers[i])
+                       continue;
+
+               struct fd6_sampler_stateobj *sampler =
+                       fd6_sampler_stateobj(tex->samplers[i]);
+
+               key.samp[i].seqno = sampler->seqno;
+
+               needs_border |= sampler->needs_border;
+       }
+
+       /* This will need update for HS/DS/GS: */
+       if (unlikely(needs_border && (sb == SB6_FS_TEX))) {
+               /* TODO we could probably use fixed offsets for each shader
+                * stage and avoid the need for # of VS samplers to be part
+                * of the FS tex state.. but I don't think our handling of
+                * BCOLOR_OFFSET is actually correct, and trying to use a
+                * hard coded offset of 16 breaks things.
+                *
+                * Note that when this changes, then a corresponding change
+                * in emit_border_color() is also needed.
+                */
+               key.bcolor_offset = ctx->tex[PIPE_SHADER_VERTEX].num_samplers;
+       }
+
+       uint32_t hash = key_hash(&key);
+       struct hash_entry *entry =
+               _mesa_hash_table_search_pre_hashed(fd6_ctx->tex_cache, hash, &key);
+
+       if (entry) {
+               return entry->data;
+       }
+
+       struct fd6_texture_state *state = CALLOC_STRUCT(fd6_texture_state);
+
+       state->key = key;
+       state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
+       state->needs_border = needs_border;
+
+       fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset);
+
+       /* NOTE: uses copy of key in state obj, because pointer passed by caller
+        * is probably on the stack
+        */
+       _mesa_hash_table_insert_pre_hashed(fd6_ctx->tex_cache, hash,
+                       &state->key, state);
+
+       return state;
+}
+
+static void
+fd6_texture_state_destroy(struct fd6_texture_state *state)
+{
+       fd_ringbuffer_del(state->stateobj);
+       free(state);
+}
+
 void
 fd6_texture_init(struct pipe_context *pctx)
 {
+       struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
+
        pctx->create_sampler_state = fd6_sampler_state_create;
+       pctx->delete_sampler_state = fd6_sampler_state_delete;
        pctx->bind_sampler_states = fd6_sampler_states_bind;
+
        pctx->create_sampler_view = fd6_sampler_view_create;
+       pctx->sampler_view_destroy = fd6_sampler_view_destroy;
        pctx->set_sampler_views = fd6_set_sampler_views;
+
+       fd6_ctx->tex_cache = _mesa_hash_table_create(NULL, key_hash, key_equals);
+}
+
+void
+fd6_texture_fini(struct pipe_context *pctx)
+{
+       struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
+
+       struct hash_entry *entry;
+       hash_table_foreach(fd6_ctx->tex_cache, entry) {
+               fd6_texture_state_destroy(entry->data);
+       }
+       ralloc_free(fd6_ctx->tex_cache);
 }
index a45ed6b3a7be68911d7db6706fd47ef4110b0dce..576afaafdb49f6a22c34e2c66e41213ae06e39bc 100644 (file)
@@ -41,6 +41,7 @@ struct fd6_sampler_stateobj {
        uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
        bool saturate_s, saturate_t, saturate_r;
        bool needs_border;
+       uint16_t seqno;
 };
 
 static inline struct fd6_sampler_stateobj *
@@ -55,6 +56,7 @@ struct fd6_pipe_sampler_view {
        uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
        uint32_t offset;
        bool astc_srgb;
+       uint16_t seqno;
 };
 
 static inline struct fd6_pipe_sampler_view *
@@ -64,7 +66,7 @@ fd6_pipe_sampler_view(struct pipe_sampler_view *pview)
 }
 
 void fd6_texture_init(struct pipe_context *pctx);
-
+void fd6_texture_fini(struct pipe_context *pctx);
 
 static inline enum a6xx_tex_type
 fd6_tex_type(unsigned target)
@@ -88,4 +90,38 @@ fd6_tex_type(unsigned target)
        }
 }
 
+/*
+ * Texture stateobj:
+ *
+ * The sampler and sampler-view state is mapped to a single hardware
+ * stateobj which can be emit'd as a pointer in a CP_SET_DRAW_STATE
+ * packet, to avoid the overhead of re-generating the entire cmdstream
+ * when application toggles thru multiple different texture states.
+ */
+
+struct fd6_texture_key {
+       struct {
+               /* We need to track the seqno of the rsc as well as of the
+                * sampler view, because resource shadowing/etc can result
+                * that the underlying bo changes (which means the previous
+                * state was no longer valid.
+                */
+               uint16_t rsc_seqno;
+               uint16_t seqno;
+       } view[16];
+       struct {
+               uint16_t seqno;
+       } samp[16];
+       uint8_t bcolor_offset;
+};
+
+struct fd6_texture_state {
+       struct fd6_texture_key key;
+       struct fd_ringbuffer *stateobj;
+       bool needs_border;
+};
+
+struct fd6_texture_state * fd6_texture_state(struct fd_context *ctx,
+               enum a6xx_state_block sb, struct fd_texture_stateobj *tex);
+
 #endif /* FD6_TEXTURE_H_ */