From d1e40b3d40b2e90ad4f275565f1ae27fe6f964cc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Michel=20D=C3=A4nzer?= Date: Thu, 23 Aug 2012 17:10:37 +0200 Subject: [PATCH] radeonsi: Maintain cache of pixel shader variants according to contxt state. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Mostly inspired by r600g commit 4acf71f01ea1edb253cd38cc059d4af1a2a40bf4 ('r600g: cache shader variants instead of rebuilding v3'). Signed-off-by: Michel Dänzer Reviewed-by: Christian König Reviewed-by: Alex Deucher --- src/gallium/drivers/radeonsi/radeonsi_pipe.h | 4 +- .../drivers/radeonsi/radeonsi_shader.c | 14 +- .../drivers/radeonsi/radeonsi_shader.h | 24 ++- src/gallium/drivers/radeonsi/si_state.c | 185 +++++++++++++++--- src/gallium/drivers/radeonsi/si_state.h | 5 + src/gallium/drivers/radeonsi/si_state_draw.c | 37 ++-- 6 files changed, 210 insertions(+), 59 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index bec2939d3f7..989bb49cbee 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -126,8 +126,8 @@ struct r600_context { unsigned pa_cl_vs_out_cntl; /* for saving when using blitter */ struct pipe_stencil_ref stencil_ref; - struct si_pipe_shader *ps_shader; - struct si_pipe_shader *vs_shader; + struct si_pipe_shader_selector *ps_shader; + struct si_pipe_shader_selector *vs_shader; struct pipe_query *current_render_cond; unsigned current_render_cond_mode; struct pipe_query *saved_render_cond; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index 0f2aaef8175..671eda4a381 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -282,7 +282,8 @@ static void declare_input_fs( switch (decl->Interp.Interpolate) { case TGSI_INTERPOLATE_COLOR: /* XXX: Flat shading hangs the GPU */ - if (si_shader_ctx->rctx->queued.named.rasterizer->flatshade) { + if (si_shader_ctx->rctx->queued.named.rasterizer && + si_shader_ctx->rctx->queued.named.rasterizer->flatshade) { #if 0 intr_name = "llvm.SI.fs.interp.constant"; #else @@ -617,6 +618,7 @@ int si_pipe_shader_create( struct si_pipe_shader *shader) { struct r600_context *rctx = (struct r600_context*)ctx; + struct si_pipe_shader_selector *sel = shader->selector; struct si_shader_context si_shader_ctx; struct tgsi_shader_info shader_info; struct lp_build_tgsi_context * bld_base; @@ -633,7 +635,7 @@ int si_pipe_shader_create( radeon_llvm_context_init(&si_shader_ctx.radeon_bld); bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; - tgsi_scan_shader(shader->tokens, &shader_info); + tgsi_scan_shader(sel->tokens, &shader_info); bld_base->info = &shader_info; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; bld_base->emit_epilogue = si_llvm_emit_epilogue; @@ -642,7 +644,7 @@ int si_pipe_shader_create( bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; si_shader_ctx.radeon_bld.load_input = declare_input; - si_shader_ctx.tokens = shader->tokens; + si_shader_ctx.tokens = sel->tokens; tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); si_shader_ctx.shader = shader; si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; @@ -653,10 +655,10 @@ int si_pipe_shader_create( /* Dump TGSI code before doing TGSI->LLVM conversion in case the * conversion fails. */ if (dump) { - tgsi_dump(shader->tokens, 0); + tgsi_dump(sel->tokens, 0); } - if (!lp_build_tgsi_llvm(bld_base, shader->tokens)) { + if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); return -EINVAL; } @@ -710,6 +712,4 @@ int si_pipe_shader_create( void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) { si_resource_reference(&shader->bo, NULL); - - memset(&shader->shader,0,sizeof(struct si_shader)); } diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h index d44ee9b128b..aa2888ccf30 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.h +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h @@ -37,6 +37,25 @@ struct si_shader_io { bool centroid; }; +struct si_pipe_shader; + +struct si_pipe_shader_selector { + struct si_pipe_shader *current; + + struct tgsi_token *tokens; + struct pipe_stream_output_info so; + + unsigned num_shaders; + + /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ + unsigned type; + + /* 1 when the shader contains + * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0. + * Used to determine whether we need to include nr_cbufs in the key */ + unsigned fs_write_all; +}; + struct si_shader { unsigned ninput; struct si_shader_io input[32]; @@ -50,16 +69,17 @@ struct si_shader { }; struct si_pipe_shader { + struct si_pipe_shader_selector *selector; + struct si_pipe_shader *next_variant; struct si_shader shader; struct si_pm4_state *pm4; struct si_resource *bo; - struct tgsi_token *tokens; unsigned num_sgprs; unsigned num_vgprs; unsigned spi_ps_input_ena; unsigned sprite_coord_enable; - struct pipe_stream_output_info so; unsigned so_strides[4]; + unsigned key; }; /* radeonsi_shader.c */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 5df22dd5f3a..5c2e7434ba3 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1717,77 +1717,200 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * shaders */ +/* Compute the key for the hw shader variant */ +static INLINE unsigned si_shader_selector_key(struct pipe_context *ctx, + struct si_pipe_shader_selector *sel) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + unsigned key = 0; + + if (sel->type == PIPE_SHADER_FRAGMENT) { + if (sel->fs_write_all) + key |= rctx->framebuffer.nr_cbufs; + /*if (rctx->queued.named.rasterizer) + key |= rctx->queued.named.rasterizer->flatshade << 4;*/ + /*key |== rctx->two_side << 5;*/ + } + + return key; +} + +/* Select the hw shader variant depending on the current state. + * (*dirty) is set to 1 if current variant was changed */ +int si_shader_select(struct pipe_context *ctx, + struct si_pipe_shader_selector *sel, + unsigned *dirty) +{ + unsigned key; + struct si_pipe_shader * shader = NULL; + int r; + + key = si_shader_selector_key(ctx, sel); + + /* Check if we don't need to change anything. + * This path is also used for most shaders that don't need multiple + * variants, it will cost just a computation of the key and this + * test. */ + if (likely(sel->current && sel->current->key == key)) { + return 0; + } + + /* lookup if we have other variants in the list */ + if (sel->num_shaders > 1) { + struct si_pipe_shader *p = sel->current, *c = p->next_variant; + + while (c && c->key != key) { + p = c; + c = c->next_variant; + } + + if (c) { + p->next_variant = c->next_variant; + shader = c; + } + } + + if (unlikely(!shader)) { + shader = CALLOC(1, sizeof(struct si_pipe_shader)); + shader->selector = sel; + + r = si_pipe_shader_create(ctx, shader); + if (unlikely(r)) { + R600_ERR("Failed to build shader variant (type=%u, key=%u) %d\n", + sel->type, key, r); + sel->current = NULL; + return r; + } + + /* We don't know the value of fs_write_all property until we built + * at least one variant, so we may need to recompute the key (include + * rctx->framebuffer.nr_cbufs) after building first variant. */ + if (sel->type == PIPE_SHADER_FRAGMENT && + sel->num_shaders == 0 && + shader->shader.fs_write_all) { + sel->fs_write_all = 1; + key = si_shader_selector_key(ctx, sel); + } + + shader->key = key; + sel->num_shaders++; + } + + if (dirty) + *dirty = 1; + + shader->next_variant = sel->current; + sel->current = shader; + + return 0; +} + static void *si_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) + const struct pipe_shader_state *state, + unsigned pipe_shader_type) { - struct si_pipe_shader *shader = CALLOC_STRUCT(si_pipe_shader); + struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector); + int r; - shader->tokens = tgsi_dup_tokens(state->tokens); - shader->so = state->stream_output; + sel->type = pipe_shader_type; + sel->tokens = tgsi_dup_tokens(state->tokens); + sel->so = state->stream_output; + + r = si_shader_select(ctx, sel, NULL); + if (r) { + free(sel); + return NULL; + } + + return sel; +} + +static void *si_create_fs_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT); +} - return shader; +static void *si_create_vs_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX); } static void si_bind_vs_shader(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_pipe_shader *shader = state; + struct si_pipe_shader_selector *sel = state; - if (rctx->vs_shader == state) + if (rctx->vs_shader == sel) return; rctx->shader_dirty = true; - rctx->vs_shader = shader; + rctx->vs_shader = sel; - if (shader) { - si_pm4_bind_state(rctx, vs, shader->pm4); - } + if (sel && sel->current) + si_pm4_bind_state(rctx, vs, sel->current->pm4); + else + si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4); } static void si_bind_ps_shader(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_pipe_shader *shader = state; + struct si_pipe_shader_selector *sel = state; - if (rctx->ps_shader == state) + if (rctx->ps_shader == sel) return; rctx->shader_dirty = true; - rctx->ps_shader = shader; + rctx->ps_shader = sel; - if (shader) { - si_pm4_bind_state(rctx, ps, shader->pm4); - } + if (sel && sel->current) + si_pm4_bind_state(rctx, ps, sel->current->pm4); + else + si_pm4_bind_state(rctx, ps, rctx->dummy_pixel_shader->pm4); } +static void si_delete_shader_selector(struct pipe_context *ctx, + struct si_pipe_shader_selector *sel) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct si_pipe_shader *p = sel->current, *c; + + while (p) { + c = p->next_variant; + si_pm4_delete_state(rctx, vs, p->pm4); + si_pipe_shader_destroy(ctx, p); + free(p); + p = c; + } + + free(sel->tokens); + free(sel); + } + static void si_delete_vs_shader(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_pipe_shader *shader = (struct si_pipe_shader *)state; + struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; - if (rctx->vs_shader == shader) { + if (rctx->vs_shader == sel) { rctx->vs_shader = NULL; } - si_pm4_delete_state(rctx, vs, shader->pm4); - free(shader->tokens); - si_pipe_shader_destroy(ctx, shader); - free(shader); + si_delete_shader_selector(ctx, sel); } static void si_delete_ps_shader(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_pipe_shader *shader = (struct si_pipe_shader *)state; + struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; - if (rctx->ps_shader == shader) { + if (rctx->ps_shader == sel) { rctx->ps_shader = NULL; } - si_pm4_delete_state(rctx, ps, shader->pm4); - free(shader->tokens); - si_pipe_shader_destroy(ctx, shader); - free(shader); + si_delete_shader_selector(ctx, sel); } /* @@ -2269,8 +2392,8 @@ void si_init_state_functions(struct r600_context *rctx) rctx->context.set_framebuffer_state = si_set_framebuffer_state; - rctx->context.create_vs_state = si_create_shader_state; - rctx->context.create_fs_state = si_create_shader_state; + rctx->context.create_vs_state = si_create_vs_state; + rctx->context.create_fs_state = si_create_fs_state; rctx->context.bind_vs_state = si_bind_vs_shader; rctx->context.bind_fs_state = si_bind_ps_shader; rctx->context.delete_vs_state = si_delete_vs_shader; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f53ecb720ff..d59624cd8e6 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -126,11 +126,16 @@ union si_state { } while(0) /* si_state.c */ +struct si_pipe_shader_selector; + bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, unsigned usage); +int si_shader_select(struct pipe_context *ctx, + struct si_pipe_shader_selector *sel, + unsigned *dirty); void si_init_state_functions(struct r600_context *rctx); void si_init_config(struct r600_context *rctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index fda8b0bd6a5..95821dc5f5c 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -317,8 +317,8 @@ static void si_update_alpha_ref(struct r600_context *rctx) static void si_update_spi_map(struct r600_context *rctx) { - struct si_shader *ps = &rctx->ps_shader->shader; - struct si_shader *vs = &rctx->vs_shader->shader; + struct si_shader *ps = &rctx->ps_shader->current->shader; + struct si_shader *vs = &rctx->vs_shader->current->shader; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); unsigned i, j, tmp; @@ -362,36 +362,39 @@ static void si_update_spi_map(struct r600_context *rctx) static void si_update_derived_state(struct r600_context *rctx) { struct pipe_context * ctx = (struct pipe_context*)rctx; + unsigned ps_dirty = 0; if (!rctx->blitter->running) { if (rctx->have_depth_fb || rctx->have_depth_texture) si_flush_depth_textures(rctx); } - if ((rctx->ps_shader->shader.fs_write_all && - (rctx->ps_shader->shader.nr_cbufs != rctx->framebuffer.nr_cbufs)) || - (rctx->sprite_coord_enable && - (rctx->ps_shader->sprite_coord_enable != rctx->sprite_coord_enable))) { - si_pipe_shader_destroy(&rctx->context, rctx->ps_shader); - } + si_shader_select(ctx, rctx->ps_shader, &ps_dirty); if (rctx->alpha_ref_dirty) { si_update_alpha_ref(rctx); } - if (!rctx->vs_shader->bo) { - si_pipe_shader_vs(ctx, rctx->vs_shader); + if (!rctx->vs_shader->current->pm4) { + si_pipe_shader_vs(ctx, rctx->vs_shader->current); } - if (!rctx->ps_shader->bo) { - si_pipe_shader_ps(ctx, rctx->ps_shader); + if (!rctx->ps_shader->current->pm4) { + si_pipe_shader_ps(ctx, rctx->ps_shader->current); + ps_dirty = 0; } - if (!rctx->ps_shader->bo) { - if (!rctx->dummy_pixel_shader->bo) + if (!rctx->ps_shader->current->bo) { + if (!rctx->dummy_pixel_shader->pm4) si_pipe_shader_ps(ctx, rctx->dummy_pixel_shader); - - if (rctx->dummy_pixel_shader->pm4) + else si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4); + + ps_dirty = 0; + } + + if (ps_dirty) { + si_pm4_bind_state(rctx, ps, rctx->ps_shader->current->pm4); + rctx->shader_dirty = true; } if (rctx->shader_dirty) { @@ -545,7 +548,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_context_draw_opaque_count(rctx, (struct r600_so_target*)info->count_from_stream_output); } - rctx->vs_shader_so_strides = rctx->vs_shader->so_strides; + rctx->vs_shader_so_strides = rctx->vs_shader->current->so_strides; if (!si_update_draw_info_state(rctx, info)) return; -- 2.30.2