vc4: Add shader variant caching to handle FS output swizzle.
authorEric Anholt <eric@anholt.net>
Tue, 1 Jul 2014 21:42:42 +0000 (14:42 -0700)
committerEric Anholt <eric@anholt.net>
Sat, 9 Aug 2014 01:59:46 +0000 (18:59 -0700)
src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_draw.c
src/gallium/drivers/vc4/vc4_program.c

index dedd98b4e5bcf28a5acae8035225fa957a29c503..8258d3046c0984cf4efd93d66ae40aaaf4c0fa49 100644 (file)
@@ -72,6 +72,9 @@ struct vc4_shader_uniform_info {
 
 struct vc4_shader_state {
         struct pipe_shader_state base;
+};
+
+struct vc4_compiled_shader {
         struct vc4_bo *bo;
 
         struct vc4_shader_uniform_info uniforms[2];
@@ -80,7 +83,8 @@ struct vc4_shader_state {
 };
 
 struct vc4_program_stateobj {
-        struct vc4_shader_state *vs, *fs;
+        struct vc4_shader_state *bind_vs, *bind_fs;
+        struct vc4_compiled_shader *vs, *fs;
         uint32_t dirty;
         uint8_t num_exports;
         /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
@@ -138,6 +142,8 @@ struct vc4_context {
 
         struct primconvert_context *primconvert;
 
+        struct util_hash_table *fs_cache, *vs_cache;
+
         /** @{ Current pipeline state objects */
         struct pipe_scissor_state scissor;
         struct pipe_blend_state *blend;
@@ -188,7 +194,7 @@ void vc4_simulator_flush(struct vc4_context *vc4,
 void *vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size);
 
 void vc4_get_uniform_bo(struct vc4_context *vc4,
-                        struct vc4_shader_state *shader,
+                        struct vc4_compiled_shader *shader,
                         struct vc4_constbuf_stateobj *cb,
                         int shader_index, struct vc4_bo **out_bo,
                         uint32_t *out_offset);
@@ -196,5 +202,6 @@ void vc4_get_uniform_bo(struct vc4_context *vc4,
 void vc4_flush(struct pipe_context *pctx);
 void vc4_emit_state(struct pipe_context *pctx);
 void vc4_generate_code(struct qcompile *c);
+void vc4_update_compiled_shaders(struct vc4_context *vc4);
 
 #endif /* VC4_CONTEXT_H */
index 1d7956d1d4fa7eb92f1f142f563bdc046a4200bd..10b5deb560e1a92a9c27b18a051ed615bb33adad 100644 (file)
@@ -98,6 +98,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                                                "tile_state");
         }
 
+        vc4_update_compiled_shaders(vc4);
+
         vc4->needs_flush = true;
 
         //   Tile state data is 48 bytes per tile, I think it can be thrown away
index 8d71212a69fb518f5cacafa108c5344e50333b9a..bc66ecc3446f82395e19d60cb7a7ca175397aada 100644 (file)
@@ -25,6 +25,9 @@
 #include <stdio.h>
 #include <inttypes.h>
 #include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_hash_table.h"
+#include "util/u_hash.h"
 #include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_dump.h"
@@ -43,11 +46,29 @@ struct tgsi_to_qir {
         struct qreg *consts;
         uint32_t num_consts;
 
+        struct vc4_shader_state *shader_state;
+        struct vc4_fs_key *fs_key;
+        struct vc4_vs_key *vs_key;
+
         uint32_t *uniform_data;
         enum quniform_contents *uniform_contents;
         uint32_t num_uniforms;
 };
 
+struct vc4_key {
+        struct vc4_shader_state *shader_state;
+};
+
+struct vc4_fs_key {
+        struct vc4_key base;
+        enum pipe_format color_format;
+};
+
+struct vc4_vs_key {
+        struct vc4_key base;
+        enum pipe_format attr_formats[8];
+};
+
 static struct qreg
 get_temp_for_uniform(struct tgsi_to_qir *trans, uint32_t uniform)
 {
@@ -323,7 +344,7 @@ parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm)
 }
 
 static void
-emit_frag_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_frag_init(struct tgsi_to_qir *trans)
 {
         /* XXX: lols */
         for (int i = 0; i < 4; i++) {
@@ -333,7 +354,7 @@ emit_frag_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
 }
 
 static void
-emit_vert_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_vert_init(struct tgsi_to_qir *trans)
 {
         struct qcompile *c = trans->c;
 
@@ -346,7 +367,7 @@ emit_vert_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
 }
 
 static void
-emit_coord_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_coord_init(struct tgsi_to_qir *trans)
 {
         struct qcompile *c = trans->c;
 
@@ -359,16 +380,27 @@ emit_coord_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
 }
 
 static void
-emit_frag_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_frag_end(struct tgsi_to_qir *trans)
 {
         struct qcompile *c = trans->c;
 
         struct qreg t = qir_get_temp(c);
+
+        const struct util_format_description *format_desc =
+                util_format_description(trans->fs_key->color_format);
+
+        struct qreg swizzled_outputs[4] = {
+                trans->outputs[format_desc->swizzle[0]],
+                trans->outputs[format_desc->swizzle[1]],
+                trans->outputs[format_desc->swizzle[2]],
+                trans->outputs[format_desc->swizzle[3]],
+        };
+
         qir_emit(c, qir_inst4(QOP_PACK_COLORS, t,
-                              trans->outputs[0],
-                              trans->outputs[1],
-                              trans->outputs[2],
-                              trans->outputs[3]));
+                              swizzled_outputs[0],
+                              swizzled_outputs[1],
+                              swizzled_outputs[2],
+                              swizzled_outputs[3]));
         qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
                              t, c->undef));
 }
@@ -409,7 +441,7 @@ emit_1_wc_write(struct tgsi_to_qir *trans)
 }
 
 static void
-emit_vert_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_vert_end(struct tgsi_to_qir *trans)
 {
         emit_scaled_viewport_write(trans);
         emit_zs_write(trans);
@@ -418,7 +450,7 @@ emit_vert_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
 }
 
 static void
-emit_coord_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_coord_end(struct tgsi_to_qir *trans)
 {
         struct qcompile *c = trans->c;
 
@@ -431,7 +463,8 @@ emit_coord_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
 }
 
 static struct tgsi_to_qir *
-vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage)
+vc4_shader_tgsi_to_qir(struct vc4_compiled_shader *shader, enum qstage stage,
+                       struct vc4_key *key)
 {
         struct tgsi_to_qir *trans = CALLOC_STRUCT(tgsi_to_qir);
         struct qcompile *c;
@@ -451,24 +484,28 @@ vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage)
         trans->uniform_data = calloc(sizeof(uint32_t), 1024);
         trans->uniform_contents = calloc(sizeof(enum quniform_contents), 1024);
 
+        trans->shader_state = key->shader_state;
         trans->c = c;
-        ret = tgsi_parse_init(&trans->parser, so->base.tokens);
+        ret = tgsi_parse_init(&trans->parser, trans->shader_state->base.tokens);
         assert(ret == TGSI_PARSE_OK);
 
         if (vc4_debug & VC4_DEBUG_TGSI) {
                 fprintf(stderr, "TGSI:\n");
-                tgsi_dump(so->base.tokens, 0);
+                tgsi_dump(trans->shader_state->base.tokens, 0);
         }
 
         switch (stage) {
         case QSTAGE_FRAG:
-                emit_frag_init(trans, so);
+                trans->fs_key = (struct vc4_fs_key *)key;
+                emit_frag_init(trans);
                 break;
         case QSTAGE_VERT:
-                emit_vert_init(trans, so);
+                trans->vs_key = (struct vc4_vs_key *)key;
+                emit_vert_init(trans);
                 break;
         case QSTAGE_COORD:
-                emit_coord_init(trans, so);
+                trans->vs_key = (struct vc4_vs_key *)key;
+                emit_coord_init(trans);
                 break;
         }
 
@@ -490,13 +527,13 @@ vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage)
 
         switch (stage) {
         case QSTAGE_FRAG:
-                emit_frag_end(trans, so);
+                emit_frag_end(trans);
                 break;
         case QSTAGE_VERT:
-                emit_vert_end(trans, so);
+                emit_vert_end(trans);
                 break;
         case QSTAGE_COORD:
-                emit_coord_end(trans, so);
+                emit_coord_end(trans);
                 break;
         }
 
@@ -520,7 +557,7 @@ vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage)
         return trans;
 }
 
-static struct vc4_shader_state *
+static void *
 vc4_shader_state_create(struct pipe_context *pctx,
                         const struct pipe_shader_state *cso)
 {
@@ -534,12 +571,12 @@ vc4_shader_state_create(struct pipe_context *pctx,
 }
 
 static void
-copy_uniform_state_to_shader(struct vc4_shader_state *so,
+copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
                              int shader_index,
                              struct tgsi_to_qir *trans)
 {
         int count = trans->num_uniforms;
-        struct vc4_shader_uniform_info *uinfo = &so->uniforms[shader_index];
+        struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
 
         uinfo->count = count;
         uinfo->data = malloc(count * sizeof(*uinfo->data));
@@ -550,71 +587,187 @@ copy_uniform_state_to_shader(struct vc4_shader_state *so,
                count * sizeof(*uinfo->contents));
 }
 
-static void *
-vc4_fs_state_create(struct pipe_context *pctx,
-                    const struct pipe_shader_state *cso)
+static void
+vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+               struct vc4_fs_key *key)
 {
-        struct vc4_context *vc4 = vc4_context(pctx);
-        struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso);
-        if (!so)
-                return NULL;
-
-        struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(so, QSTAGE_FRAG);
-        copy_uniform_state_to_shader(so, 0, trans);
+        struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(shader, QSTAGE_FRAG,
+                                                           &key->base);
+        copy_uniform_state_to_shader(shader, 0, trans);
 
-        so->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts,
-                                  trans->c->num_qpu_insts * sizeof(uint64_t),
-                                  "fs_code");
+        shader->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts,
+                                      trans->c->num_qpu_insts * sizeof(uint64_t),
+                                      "fs_code");
 
         qir_compile_destroy(trans->c);
         free(trans);
-
-        return so;
 }
 
-static void *
-vc4_vs_state_create(struct pipe_context *pctx,
-                    const struct pipe_shader_state *cso)
+static void
+vc4_vs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+               struct vc4_vs_key *key)
 {
-        struct vc4_context *vc4 = vc4_context(pctx);
-        struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso);
-        if (!so)
-                return NULL;
-
-        struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(so, QSTAGE_VERT);
-        copy_uniform_state_to_shader(so, 0, vs_trans);
+        struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(shader,
+                                                              QSTAGE_VERT,
+                                                              &key->base);
+        copy_uniform_state_to_shader(shader, 0, vs_trans);
 
-        struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(so, QSTAGE_COORD);
-        copy_uniform_state_to_shader(so, 1, cs_trans);
+        struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(shader,
+                                                              QSTAGE_COORD,
+                                                              &key->base);
+        copy_uniform_state_to_shader(shader, 1, cs_trans);
 
         uint32_t vs_size = vs_trans->c->num_qpu_insts * sizeof(uint64_t);
         uint32_t cs_size = cs_trans->c->num_qpu_insts * sizeof(uint64_t);
-        so->coord_shader_offset = vs_size; /* XXX: alignment? */
-        so->bo = vc4_bo_alloc(vc4->screen,
-                              so->coord_shader_offset + cs_size,
-                              "vs_code");
+        shader->coord_shader_offset = vs_size; /* XXX: alignment? */
+        shader->bo = vc4_bo_alloc(vc4->screen,
+                                  shader->coord_shader_offset + cs_size,
+                                  "vs_code");
 
-        void *map = vc4_bo_map(so->bo);
+        void *map = vc4_bo_map(shader->bo);
         memcpy(map, vs_trans->c->qpu_insts, vs_size);
-        memcpy(map + so->coord_shader_offset, cs_trans->c->qpu_insts, cs_size);
+        memcpy(map + shader->coord_shader_offset,
+               cs_trans->c->qpu_insts, cs_size);
 
         qir_compile_destroy(vs_trans->c);
         qir_compile_destroy(cs_trans->c);
+}
 
-        return so;
+static void
+vc4_update_compiled_fs(struct vc4_context *vc4)
+{
+        struct vc4_fs_key local_key;
+        struct vc4_fs_key *key = &local_key;
+
+        memset(key, 0, sizeof(*key));
+        key->base.shader_state = vc4->prog.bind_fs;
+
+        if (vc4->framebuffer.cbufs[0])
+                key->color_format = vc4->framebuffer.cbufs[0]->format;
+
+        vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
+        if (vc4->prog.fs)
+                return;
+
+        key = malloc(sizeof(*key));
+        memcpy(key, &local_key, sizeof(*key));
+
+        struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
+        vc4_fs_compile(vc4, shader, key);
+        util_hash_table_set(vc4->fs_cache, key, shader);
+
+        vc4->prog.fs = shader;
+}
+
+static void
+vc4_update_compiled_vs(struct vc4_context *vc4)
+{
+        struct vc4_vs_key local_key;
+        struct vc4_vs_key *key = &local_key;
+
+        memset(key, 0, sizeof(*key));
+        key->base.shader_state = vc4->prog.bind_vs;
+
+        vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key);
+        if (vc4->prog.vs)
+                return;
+
+        key = malloc(sizeof(*key));
+        memcpy(key, &local_key, sizeof(*key));
+
+        struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
+        vc4_vs_compile(vc4, shader, key);
+        util_hash_table_set(vc4->vs_cache, key, shader);
+
+        vc4->prog.vs = shader;
+}
+
+void
+vc4_update_compiled_shaders(struct vc4_context *vc4)
+{
+        vc4_update_compiled_fs(vc4);
+        vc4_update_compiled_vs(vc4);
+}
+
+static unsigned
+fs_cache_hash(void *key)
+{
+        return util_hash_crc32(key, sizeof(struct vc4_fs_key));
+}
+
+static unsigned
+vs_cache_hash(void *key)
+{
+        return util_hash_crc32(key, sizeof(struct vc4_vs_key));
+}
+
+static int
+fs_cache_compare(void *key1, void *key2)
+{
+        return memcmp(key1, key2, sizeof(struct vc4_fs_key));
+}
+
+static int
+vs_cache_compare(void *key1, void *key2)
+{
+        return memcmp(key1, key2, sizeof(struct vc4_vs_key));
+}
+
+struct delete_state {
+        struct vc4_context *vc4;
+        struct vc4_shader_state *shader_state;
+};
+
+static enum pipe_error
+fs_delete_from_cache(void *in_key, void *in_value, void *data)
+{
+        struct delete_state *del = data;
+        struct vc4_fs_key *key = in_key;
+        struct vc4_compiled_shader *shader = in_value;
+
+        if (key->base.shader_state == data) {
+                util_hash_table_remove(del->vc4->fs_cache, key);
+                vc4_bo_unreference(&shader->bo);
+                free(shader);
+        }
+
+        return 0;
+}
+
+static enum pipe_error
+vs_delete_from_cache(void *in_key, void *in_value, void *data)
+{
+        struct delete_state *del = data;
+        struct vc4_vs_key *key = in_key;
+        struct vc4_compiled_shader *shader = in_value;
+
+        if (key->base.shader_state == data) {
+                util_hash_table_remove(del->vc4->vs_cache, key);
+                vc4_bo_unreference(&shader->bo);
+                free(shader);
+        }
+
+        return 0;
 }
 
 static void
 vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
 {
-        struct pipe_shader_state *so = hwcso;
+        struct vc4_context *vc4 = vc4_context(pctx);
+        struct vc4_shader_state *so = hwcso;
+        struct delete_state del;
+
+        del.vc4 = vc4;
+        del.shader_state = so;
+        util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del);
+        util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del);
 
-        free((void *)so->tokens);
+        free((void *)so->base.tokens);
         free(so);
 }
 
 void
-vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_shader_state *shader,
+vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                    struct vc4_constbuf_stateobj *cb,
                    int shader_index, struct vc4_bo **out_bo,
                    uint32_t *out_offset)
@@ -653,7 +806,7 @@ static void
 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        vc4->prog.fs = hwcso;
+        vc4->prog.bind_fs = hwcso;
         vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
         vc4->dirty |= VC4_DIRTY_PROG;
 }
@@ -662,7 +815,7 @@ static void
 vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        vc4->prog.vs = hwcso;
+        vc4->prog.bind_vs = hwcso;
         vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
         vc4->dirty |= VC4_DIRTY_PROG;
 }
@@ -670,12 +823,17 @@ vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
 void
 vc4_program_init(struct pipe_context *pctx)
 {
-        pctx->create_vs_state = vc4_vs_state_create;
+        struct vc4_context *vc4 = vc4_context(pctx);
+
+        pctx->create_vs_state = vc4_shader_state_create;
         pctx->delete_vs_state = vc4_shader_state_delete;
 
-        pctx->create_fs_state = vc4_fs_state_create;
+        pctx->create_fs_state = vc4_shader_state_create;
         pctx->delete_fs_state = vc4_shader_state_delete;
 
         pctx->bind_fs_state = vc4_fp_state_bind;
         pctx->bind_vs_state = vc4_vp_state_bind;
+
+        vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare);
+        vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare);
 }