From cf2d777fbe7fd87f9076d4bc0ad639b33e7ffb70 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 1 Jul 2014 14:42:42 -0700 Subject: [PATCH] vc4: Add shader variant caching to handle FS output swizzle. --- src/gallium/drivers/vc4/vc4_context.h | 11 +- src/gallium/drivers/vc4/vc4_draw.c | 2 + src/gallium/drivers/vc4/vc4_program.c | 284 ++++++++++++++++++++------ 3 files changed, 232 insertions(+), 65 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index dedd98b4e5b..8258d3046c0 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -72,6 +72,9 @@ struct vc4_shader_uniform_info { struct vc4_shader_state { struct pipe_shader_state base; +}; + +struct vc4_compiled_shader { struct vc4_bo *bo; struct vc4_shader_uniform_info uniforms[2]; @@ -80,7 +83,8 @@ struct vc4_shader_state { }; struct vc4_program_stateobj { - struct vc4_shader_state *vs, *fs; + struct vc4_shader_state *bind_vs, *bind_fs; + struct vc4_compiled_shader *vs, *fs; uint32_t dirty; uint8_t num_exports; /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index @@ -138,6 +142,8 @@ struct vc4_context { struct primconvert_context *primconvert; + struct util_hash_table *fs_cache, *vs_cache; + /** @{ Current pipeline state objects */ struct pipe_scissor_state scissor; struct pipe_blend_state *blend; @@ -188,7 +194,7 @@ void vc4_simulator_flush(struct vc4_context *vc4, void *vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size); void vc4_get_uniform_bo(struct vc4_context *vc4, - struct vc4_shader_state *shader, + struct vc4_compiled_shader *shader, struct vc4_constbuf_stateobj *cb, int shader_index, struct vc4_bo **out_bo, uint32_t *out_offset); @@ -196,5 +202,6 @@ void vc4_get_uniform_bo(struct vc4_context *vc4, void vc4_flush(struct pipe_context *pctx); void vc4_emit_state(struct pipe_context *pctx); void vc4_generate_code(struct qcompile *c); +void vc4_update_compiled_shaders(struct vc4_context *vc4); #endif /* VC4_CONTEXT_H */ diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 1d7956d1d4f..10b5deb560e 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -98,6 +98,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) "tile_state"); } + vc4_update_compiled_shaders(vc4); + vc4->needs_flush = true; // Tile state data is 48 bytes per tile, I think it can be thrown away diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 8d71212a69f..bc66ecc3446 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -25,6 +25,9 @@ #include #include #include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_hash_table.h" +#include "util/u_hash.h" #include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" @@ -43,11 +46,29 @@ struct tgsi_to_qir { struct qreg *consts; uint32_t num_consts; + struct vc4_shader_state *shader_state; + struct vc4_fs_key *fs_key; + struct vc4_vs_key *vs_key; + uint32_t *uniform_data; enum quniform_contents *uniform_contents; uint32_t num_uniforms; }; +struct vc4_key { + struct vc4_shader_state *shader_state; +}; + +struct vc4_fs_key { + struct vc4_key base; + enum pipe_format color_format; +}; + +struct vc4_vs_key { + struct vc4_key base; + enum pipe_format attr_formats[8]; +}; + static struct qreg get_temp_for_uniform(struct tgsi_to_qir *trans, uint32_t uniform) { @@ -323,7 +344,7 @@ parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm) } static void -emit_frag_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so) +emit_frag_init(struct tgsi_to_qir *trans) { /* XXX: lols */ for (int i = 0; i < 4; i++) { @@ -333,7 +354,7 @@ emit_frag_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so) } static void -emit_vert_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so) +emit_vert_init(struct tgsi_to_qir *trans) { struct qcompile *c = trans->c; @@ -346,7 +367,7 @@ emit_vert_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so) } static void -emit_coord_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so) +emit_coord_init(struct tgsi_to_qir *trans) { struct qcompile *c = trans->c; @@ -359,16 +380,27 @@ emit_coord_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so) } static void -emit_frag_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so) +emit_frag_end(struct tgsi_to_qir *trans) { struct qcompile *c = trans->c; struct qreg t = qir_get_temp(c); + + const struct util_format_description *format_desc = + util_format_description(trans->fs_key->color_format); + + struct qreg swizzled_outputs[4] = { + trans->outputs[format_desc->swizzle[0]], + trans->outputs[format_desc->swizzle[1]], + trans->outputs[format_desc->swizzle[2]], + trans->outputs[format_desc->swizzle[3]], + }; + qir_emit(c, qir_inst4(QOP_PACK_COLORS, t, - trans->outputs[0], - trans->outputs[1], - trans->outputs[2], - trans->outputs[3])); + swizzled_outputs[0], + swizzled_outputs[1], + swizzled_outputs[2], + swizzled_outputs[3])); qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef, t, c->undef)); } @@ -409,7 +441,7 @@ emit_1_wc_write(struct tgsi_to_qir *trans) } static void -emit_vert_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so) +emit_vert_end(struct tgsi_to_qir *trans) { emit_scaled_viewport_write(trans); emit_zs_write(trans); @@ -418,7 +450,7 @@ emit_vert_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so) } static void -emit_coord_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so) +emit_coord_end(struct tgsi_to_qir *trans) { struct qcompile *c = trans->c; @@ -431,7 +463,8 @@ emit_coord_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so) } static struct tgsi_to_qir * -vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage) +vc4_shader_tgsi_to_qir(struct vc4_compiled_shader *shader, enum qstage stage, + struct vc4_key *key) { struct tgsi_to_qir *trans = CALLOC_STRUCT(tgsi_to_qir); struct qcompile *c; @@ -451,24 +484,28 @@ vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage) trans->uniform_data = calloc(sizeof(uint32_t), 1024); trans->uniform_contents = calloc(sizeof(enum quniform_contents), 1024); + trans->shader_state = key->shader_state; trans->c = c; - ret = tgsi_parse_init(&trans->parser, so->base.tokens); + ret = tgsi_parse_init(&trans->parser, trans->shader_state->base.tokens); assert(ret == TGSI_PARSE_OK); if (vc4_debug & VC4_DEBUG_TGSI) { fprintf(stderr, "TGSI:\n"); - tgsi_dump(so->base.tokens, 0); + tgsi_dump(trans->shader_state->base.tokens, 0); } switch (stage) { case QSTAGE_FRAG: - emit_frag_init(trans, so); + trans->fs_key = (struct vc4_fs_key *)key; + emit_frag_init(trans); break; case QSTAGE_VERT: - emit_vert_init(trans, so); + trans->vs_key = (struct vc4_vs_key *)key; + emit_vert_init(trans); break; case QSTAGE_COORD: - emit_coord_init(trans, so); + trans->vs_key = (struct vc4_vs_key *)key; + emit_coord_init(trans); break; } @@ -490,13 +527,13 @@ vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage) switch (stage) { case QSTAGE_FRAG: - emit_frag_end(trans, so); + emit_frag_end(trans); break; case QSTAGE_VERT: - emit_vert_end(trans, so); + emit_vert_end(trans); break; case QSTAGE_COORD: - emit_coord_end(trans, so); + emit_coord_end(trans); break; } @@ -520,7 +557,7 @@ vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage) return trans; } -static struct vc4_shader_state * +static void * vc4_shader_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { @@ -534,12 +571,12 @@ vc4_shader_state_create(struct pipe_context *pctx, } static void -copy_uniform_state_to_shader(struct vc4_shader_state *so, +copy_uniform_state_to_shader(struct vc4_compiled_shader *shader, int shader_index, struct tgsi_to_qir *trans) { int count = trans->num_uniforms; - struct vc4_shader_uniform_info *uinfo = &so->uniforms[shader_index]; + struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index]; uinfo->count = count; uinfo->data = malloc(count * sizeof(*uinfo->data)); @@ -550,71 +587,187 @@ copy_uniform_state_to_shader(struct vc4_shader_state *so, count * sizeof(*uinfo->contents)); } -static void * -vc4_fs_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) +static void +vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader, + struct vc4_fs_key *key) { - struct vc4_context *vc4 = vc4_context(pctx); - struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso); - if (!so) - return NULL; - - struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(so, QSTAGE_FRAG); - copy_uniform_state_to_shader(so, 0, trans); + struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(shader, QSTAGE_FRAG, + &key->base); + copy_uniform_state_to_shader(shader, 0, trans); - so->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts, - trans->c->num_qpu_insts * sizeof(uint64_t), - "fs_code"); + shader->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts, + trans->c->num_qpu_insts * sizeof(uint64_t), + "fs_code"); qir_compile_destroy(trans->c); free(trans); - - return so; } -static void * -vc4_vs_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) +static void +vc4_vs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader, + struct vc4_vs_key *key) { - struct vc4_context *vc4 = vc4_context(pctx); - struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso); - if (!so) - return NULL; - - struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(so, QSTAGE_VERT); - copy_uniform_state_to_shader(so, 0, vs_trans); + struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(shader, + QSTAGE_VERT, + &key->base); + copy_uniform_state_to_shader(shader, 0, vs_trans); - struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(so, QSTAGE_COORD); - copy_uniform_state_to_shader(so, 1, cs_trans); + struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(shader, + QSTAGE_COORD, + &key->base); + copy_uniform_state_to_shader(shader, 1, cs_trans); uint32_t vs_size = vs_trans->c->num_qpu_insts * sizeof(uint64_t); uint32_t cs_size = cs_trans->c->num_qpu_insts * sizeof(uint64_t); - so->coord_shader_offset = vs_size; /* XXX: alignment? */ - so->bo = vc4_bo_alloc(vc4->screen, - so->coord_shader_offset + cs_size, - "vs_code"); + shader->coord_shader_offset = vs_size; /* XXX: alignment? */ + shader->bo = vc4_bo_alloc(vc4->screen, + shader->coord_shader_offset + cs_size, + "vs_code"); - void *map = vc4_bo_map(so->bo); + void *map = vc4_bo_map(shader->bo); memcpy(map, vs_trans->c->qpu_insts, vs_size); - memcpy(map + so->coord_shader_offset, cs_trans->c->qpu_insts, cs_size); + memcpy(map + shader->coord_shader_offset, + cs_trans->c->qpu_insts, cs_size); qir_compile_destroy(vs_trans->c); qir_compile_destroy(cs_trans->c); +} - return so; +static void +vc4_update_compiled_fs(struct vc4_context *vc4) +{ + struct vc4_fs_key local_key; + struct vc4_fs_key *key = &local_key; + + memset(key, 0, sizeof(*key)); + key->base.shader_state = vc4->prog.bind_fs; + + if (vc4->framebuffer.cbufs[0]) + key->color_format = vc4->framebuffer.cbufs[0]->format; + + vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key); + if (vc4->prog.fs) + return; + + key = malloc(sizeof(*key)); + memcpy(key, &local_key, sizeof(*key)); + + struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader); + vc4_fs_compile(vc4, shader, key); + util_hash_table_set(vc4->fs_cache, key, shader); + + vc4->prog.fs = shader; +} + +static void +vc4_update_compiled_vs(struct vc4_context *vc4) +{ + struct vc4_vs_key local_key; + struct vc4_vs_key *key = &local_key; + + memset(key, 0, sizeof(*key)); + key->base.shader_state = vc4->prog.bind_vs; + + vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key); + if (vc4->prog.vs) + return; + + key = malloc(sizeof(*key)); + memcpy(key, &local_key, sizeof(*key)); + + struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader); + vc4_vs_compile(vc4, shader, key); + util_hash_table_set(vc4->vs_cache, key, shader); + + vc4->prog.vs = shader; +} + +void +vc4_update_compiled_shaders(struct vc4_context *vc4) +{ + vc4_update_compiled_fs(vc4); + vc4_update_compiled_vs(vc4); +} + +static unsigned +fs_cache_hash(void *key) +{ + return util_hash_crc32(key, sizeof(struct vc4_fs_key)); +} + +static unsigned +vs_cache_hash(void *key) +{ + return util_hash_crc32(key, sizeof(struct vc4_vs_key)); +} + +static int +fs_cache_compare(void *key1, void *key2) +{ + return memcmp(key1, key2, sizeof(struct vc4_fs_key)); +} + +static int +vs_cache_compare(void *key1, void *key2) +{ + return memcmp(key1, key2, sizeof(struct vc4_vs_key)); +} + +struct delete_state { + struct vc4_context *vc4; + struct vc4_shader_state *shader_state; +}; + +static enum pipe_error +fs_delete_from_cache(void *in_key, void *in_value, void *data) +{ + struct delete_state *del = data; + struct vc4_fs_key *key = in_key; + struct vc4_compiled_shader *shader = in_value; + + if (key->base.shader_state == data) { + util_hash_table_remove(del->vc4->fs_cache, key); + vc4_bo_unreference(&shader->bo); + free(shader); + } + + return 0; +} + +static enum pipe_error +vs_delete_from_cache(void *in_key, void *in_value, void *data) +{ + struct delete_state *del = data; + struct vc4_vs_key *key = in_key; + struct vc4_compiled_shader *shader = in_value; + + if (key->base.shader_state == data) { + util_hash_table_remove(del->vc4->vs_cache, key); + vc4_bo_unreference(&shader->bo); + free(shader); + } + + return 0; } static void vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso) { - struct pipe_shader_state *so = hwcso; + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_shader_state *so = hwcso; + struct delete_state del; + + del.vc4 = vc4; + del.shader_state = so; + util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del); + util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del); - free((void *)so->tokens); + free((void *)so->base.tokens); free(so); } void -vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_shader_state *shader, +vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader, struct vc4_constbuf_stateobj *cb, int shader_index, struct vc4_bo **out_bo, uint32_t *out_offset) @@ -653,7 +806,7 @@ static void vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso) { struct vc4_context *vc4 = vc4_context(pctx); - vc4->prog.fs = hwcso; + vc4->prog.bind_fs = hwcso; vc4->prog.dirty |= VC4_SHADER_DIRTY_FP; vc4->dirty |= VC4_DIRTY_PROG; } @@ -662,7 +815,7 @@ static void vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso) { struct vc4_context *vc4 = vc4_context(pctx); - vc4->prog.vs = hwcso; + vc4->prog.bind_vs = hwcso; vc4->prog.dirty |= VC4_SHADER_DIRTY_VP; vc4->dirty |= VC4_DIRTY_PROG; } @@ -670,12 +823,17 @@ vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso) void vc4_program_init(struct pipe_context *pctx) { - pctx->create_vs_state = vc4_vs_state_create; + struct vc4_context *vc4 = vc4_context(pctx); + + pctx->create_vs_state = vc4_shader_state_create; pctx->delete_vs_state = vc4_shader_state_delete; - pctx->create_fs_state = vc4_fs_state_create; + pctx->create_fs_state = vc4_shader_state_create; pctx->delete_fs_state = vc4_shader_state_delete; pctx->bind_fs_state = vc4_fp_state_bind; pctx->bind_vs_state = vc4_vp_state_bind; + + vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare); + vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare); } -- 2.30.2