From: Vadim Girlin Date: Mon, 11 Jun 2012 09:11:47 +0000 (+0400) Subject: r600g: cache shader variants instead of rebuilding v3 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4acf71f01ea1edb253cd38cc059d4af1a2a40bf4;p=mesa.git r600g: cache shader variants instead of rebuilding v3 Shader variants are stored in the list, the key for lookup is based on the states that require different hw shaders - currently it's rctx->two_side (all gpus) and rctx->nr_cbufs (evergreen/cayman, when writes_all property is set). v2: - use simple list instead of keymap as suggested by Marek on irc - call r600_adjust_gprs from r600_bind_vs_shader for r6xx/r7xx (r600_shader_select isn't used for vertex shaders currently) v3: - fix call to r600_adjust_gprs - do it after updating current shader Improves performance for some apps, e.g. FlightGear - see https://bugs.freedesktop.org/show_bug.cgi?id=50360 Signed-off-by: Vadim Girlin --- diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index d6ed20d6cf9..4b82b06f177 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1848,12 +1848,12 @@ void evergreen_init_state_functions(struct r600_context *rctx) rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; - rctx->context.create_fs_state = r600_create_shader_state; + rctx->context.create_fs_state = r600_create_shader_state_ps; rctx->context.create_rasterizer_state = evergreen_create_rs_state; rctx->context.create_sampler_state = evergreen_create_sampler_state; rctx->context.create_sampler_view = evergreen_create_sampler_view; rctx->context.create_vertex_elements_state = r600_create_vertex_elements; - rctx->context.create_vs_state = r600_create_shader_state; + rctx->context.create_vs_state = r600_create_shader_state_vs; rctx->context.bind_blend_state = r600_bind_blend_state; rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state; rctx->context.bind_fragment_sampler_states = evergreen_bind_ps_sampler; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 9806e5b261d..5d194e325f7 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -185,18 +185,38 @@ struct r600_vertex_element struct r600_pipe_state rstate; }; +struct r600_pipe_shader; + +struct r600_pipe_shader_selector { + struct r600_pipe_shader *current; + + struct tgsi_token *tokens; + struct pipe_stream_output_info so; + + unsigned num_shaders; + + /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ + unsigned type; + + /* 1 on evergreen+ when the shader contains + * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0. + * Used to determine whether we need to include nr_cbufs in the key */ + unsigned eg_fs_write_all; +}; + struct r600_pipe_shader { + struct r600_pipe_shader_selector *selector; + struct r600_pipe_shader *next_variant; struct r600_shader shader; struct r600_pipe_state rstate; struct r600_resource *bo; struct r600_resource *bo_fetch; struct r600_vertex_element vertex_elements; - struct tgsi_token *tokens; unsigned sprite_coord_enable; unsigned flatshade; unsigned pa_cl_vs_out_cntl; - unsigned ps_cb_shader_mask; - struct pipe_stream_output_info so; + unsigned ps_cb_shader_mask; + unsigned key; }; struct r600_pipe_sampler_state { @@ -272,8 +292,8 @@ struct r600_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; - struct r600_pipe_shader *ps_shader; - struct r600_pipe_shader *vs_shader; + struct r600_pipe_shader_selector *ps_shader; + struct r600_pipe_shader_selector *vs_shader; struct r600_pipe_compute *cs_shader; struct r600_pipe_rasterizer *rasterizer; struct r600_pipe_state vgt; @@ -436,8 +456,6 @@ int r600_compute_shader_create(struct pipe_context * ctx, LLVMModuleRef mod, struct r600_bytecode * bytecode); #endif void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); -int r600_find_vs_semantic_index(struct r600_shader *vs, - struct r600_shader *ps, int id); /* r600_state.c */ void r600_set_scissor_state(struct r600_context *rctx, @@ -497,8 +515,10 @@ void r600_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state); void r600_delete_state(struct pipe_context *ctx, void *state); void r600_bind_vertex_elements(struct pipe_context *ctx, void *state); -void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state); +void *r600_create_shader_state_ps(struct pipe_context *ctx, + const struct pipe_shader_state *state); +void *r600_create_shader_state_vs(struct pipe_context *ctx, + const struct pipe_shader_state *state); void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d294084de9c..f690c10fd7c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -109,6 +109,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s { static int dump_shaders = -1; struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_pipe_shader_selector *sel = shader->selector; int r; /* Would like some magic "get_bool_option_once" routine. @@ -118,16 +119,16 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); - tgsi_dump(shader->tokens, 0); + tgsi_dump(sel->tokens, 0); - if (shader->so.num_outputs) { + if (sel->so.num_outputs) { unsigned i; fprintf(stderr, "STREAMOUT\n"); - for (i = 0; i < shader->so.num_outputs; i++) { - unsigned mask = ((1 << shader->so.output[i].num_components) - 1) << - shader->so.output[i].start_component; + for (i = 0; i < sel->so.num_outputs; i++) { + unsigned mask = ((1 << sel->so.output[i].num_components) - 1) << + sel->so.output[i].start_component; fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i, - shader->so.output[i].output_buffer, shader->so.output[i].register_index, + sel->so.output[i].output_buffer, sel->so.output[i].register_index, mask & 1 ? "x" : "_", (mask >> 1) & 1 ? "y" : "_", (mask >> 2) & 1 ? "z" : "_", @@ -156,8 +157,6 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader { pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); r600_bytecode_clear(&shader->shader.bc); - - memset(&shader->shader,0,sizeof(struct r600_shader)); } /* @@ -1118,8 +1117,8 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader) { struct r600_shader *shader = &pipeshader->shader; - struct tgsi_token *tokens = pipeshader->tokens; - struct pipe_stream_output_info so = pipeshader->so; + struct tgsi_token *tokens = pipeshader->selector->tokens; + struct pipe_stream_output_info so = pipeshader->selector->so; struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; struct r600_shader_ctx ctx; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3ac9b8d396b..295453a5aef 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1810,12 +1810,12 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; - rctx->context.create_fs_state = r600_create_shader_state; + rctx->context.create_fs_state = r600_create_shader_state_ps; rctx->context.create_rasterizer_state = r600_create_rs_state; rctx->context.create_sampler_state = r600_create_sampler_state; rctx->context.create_sampler_view = r600_create_sampler_view; rctx->context.create_vertex_elements_state = r600_create_vertex_elements; - rctx->context.create_vs_state = r600_create_shader_state; + rctx->context.create_vs_state = r600_create_shader_state_vs; rctx->context.bind_blend_state = r600_bind_blend_state; rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state; rctx->context.bind_fragment_sampler_states = r600_bind_ps_samplers; @@ -1851,6 +1851,7 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.set_stream_output_targets = r600_set_so_targets; } +/* Adjust GPR allocation on R6xx/R7xx */ void r600_adjust_gprs(struct r600_context *rctx) { struct r600_pipe_state rstate; @@ -1859,22 +1860,22 @@ void r600_adjust_gprs(struct r600_context *rctx) unsigned tmp; int diff; - if (rctx->chip_class >= EVERGREEN) - return; - - if (!rctx->ps_shader || !rctx->vs_shader) - return; + /* XXX: Following call moved from r600_bind_[ps|vs]_shader, + * it seems eg+ doesn't need it, r6xx/7xx probably need it only for + * adjusting the GPR allocation? + * Do we need this if we aren't really changing config below? */ + r600_inval_shader_cache(rctx); - if (rctx->ps_shader->shader.bc.ngpr > rctx->default_ps_gprs) + if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) { - diff = rctx->ps_shader->shader.bc.ngpr - rctx->default_ps_gprs; + diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs; num_vs_gprs -= diff; num_ps_gprs += diff; } - if (rctx->vs_shader->shader.bc.ngpr > rctx->default_vs_gprs) + if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) { - diff = rctx->vs_shader->shader.bc.ngpr - rctx->default_vs_gprs; + diff = rctx->vs_shader->current->shader.bc.ngpr - rctx->default_vs_gprs; num_ps_gprs -= diff; num_vs_gprs += diff; } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a2ea42f9c5f..879dcc50edb 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -429,39 +429,146 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, return v; } -void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) +/* Compute the key for the hw shader variant */ +static INLINE unsigned r600_shader_selector_key(struct pipe_context * ctx, + struct r600_pipe_shader_selector * sel) { - struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); + struct r600_context *rctx = (struct r600_context *)ctx; + unsigned key; + + if (sel->type == PIPE_SHADER_FRAGMENT) { + key = rctx->two_side; + if (sel->eg_fs_write_all) + key |= rctx->nr_cbufs << 1; + } else + key = 0; + + return key; +} + +/* Select the hw shader variant depending on the current state. + * (*dirty) is set to 1 if current variant was changed */ +static int r600_shader_select(struct pipe_context *ctx, + struct r600_pipe_shader_selector* sel, + unsigned *dirty) +{ + unsigned key; + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_pipe_shader * shader = NULL; int r; - shader->tokens = tgsi_dup_tokens(state->tokens); - shader->so = state->stream_output; + key = r600_shader_selector_key(ctx, sel); - r = r600_pipe_shader_create(ctx, shader); - if (r) { - return NULL; + /* Check if we don't need to change anything. + * This path is also used for most shaders that don't need multiple + * variants, it will cost just a computation of the key and this + * test. */ + if (likely(sel->current && sel->current->key == key)) { + return 0; + } + + /* lookup if we have other variants in the list */ + if (sel->num_shaders > 1) { + struct r600_pipe_shader *p = sel->current, *c = p->next_variant; + + while (c && c->key != key) { + p = c; + c = c->next_variant; + } + + if (c) { + p->next_variant = c->next_variant; + shader = c; + } + } + + if (unlikely(!shader)) { + shader = CALLOC(1, sizeof(struct r600_pipe_shader)); + shader->selector = sel; + + r = r600_pipe_shader_create(ctx, shader); + if (unlikely(r)) { + R600_ERR("Failed to build shader variant (type=%u, key=%u) %d\n", + sel->type, key, r); + sel->current = NULL; + return r; + } + + /* We don't know the value of eg_fs_write_all property until we built + * at least one variant, so we may need to recompute the key (include + * rctx->nr_cbufs) after building first variant. */ + if (sel->type == PIPE_SHADER_FRAGMENT && + sel->num_shaders == 0 && + rctx->chip_class >= EVERGREEN && + shader->shader.fs_write_all) { + sel->eg_fs_write_all = 1; + key = r600_shader_selector_key(ctx, sel); + } + + shader->key = key; + sel->num_shaders++; + } + + if (dirty) + *dirty = 1; + + shader->next_variant = sel->current; + sel->current = shader; + + /* Moved from r600_bind_ps_shader, different shader variants + * may use different number of GPRs, so we need to update it. */ + /* FIXME: we never did it after rebuilding the shaders, is it required? */ + if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { + r600_adjust_gprs(rctx); } - return shader; + + return 0; +} + +static void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state, + unsigned pipe_shader_type) +{ + struct r600_pipe_shader_selector *sel = CALLOC_STRUCT(r600_pipe_shader_selector); + int r; + + sel->type = pipe_shader_type; + sel->tokens = tgsi_dup_tokens(state->tokens); + sel->so = state->stream_output; + + r = r600_shader_select(ctx, sel, NULL); + if (r) + return NULL; + + return sel; +} + +void *r600_create_shader_state_ps(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + return r600_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT); +} + +void *r600_create_shader_state_vs(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + return r600_create_shader_state(ctx, state, PIPE_SHADER_VERTEX); } void r600_bind_ps_shader(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; - if (!state) { + if (!state) state = rctx->dummy_pixel_shader; - } - - rctx->ps_shader = (struct r600_pipe_shader *)state; - r600_inval_shader_cache(rctx); - r600_context_pipe_state_set(rctx, &rctx->ps_shader->rstate); + rctx->ps_shader = (struct r600_pipe_shader_selector *)state; + r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate); rctx->cb_color_control &= C_028808_MULTIWRITE_ENABLE; - rctx->cb_color_control |= S_028808_MULTIWRITE_ENABLE(!!rctx->ps_shader->shader.fs_write_all); + rctx->cb_color_control |= S_028808_MULTIWRITE_ENABLE(!!rctx->ps_shader->current->shader.fs_write_all); - if (rctx->ps_shader && rctx->vs_shader) { + if (rctx->chip_class < EVERGREEN && rctx->vs_shader) { r600_adjust_gprs(rctx); } } @@ -470,42 +577,53 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; - rctx->vs_shader = (struct r600_pipe_shader *)state; + rctx->vs_shader = (struct r600_pipe_shader_selector *)state; if (state) { - r600_inval_shader_cache(rctx); - r600_context_pipe_state_set(rctx, &rctx->vs_shader->rstate); + r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate); + + if (rctx->chip_class < EVERGREEN && rctx->ps_shader) + r600_adjust_gprs(rctx); } - if (rctx->ps_shader && rctx->vs_shader) { - r600_adjust_gprs(rctx); +} + +static void r600_delete_shader_selector(struct pipe_context *ctx, + struct r600_pipe_shader_selector *sel) +{ + struct r600_pipe_shader *p = sel->current, *c; + while (p) { + c = p->next_variant; + r600_pipe_shader_destroy(ctx, p); + free(p); + p = c; } + + free(sel->tokens); + free(sel); } + void r600_delete_ps_shader(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state; - if (rctx->ps_shader == shader) { + if (rctx->ps_shader == sel) { rctx->ps_shader = NULL; } - free(shader->tokens); - r600_pipe_shader_destroy(ctx, shader); - free(shader); + r600_delete_shader_selector(ctx, sel); } void r600_delete_vs_shader(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state; - if (rctx->vs_shader == shader) { + if (rctx->vs_shader == sel) { rctx->vs_shader = NULL; } - free(shader->tokens); - r600_pipe_shader_destroy(ctx, shader); - free(shader); + r600_delete_shader_selector(ctx, sel); } static void r600_update_alpha_ref(struct r600_context *rctx) @@ -661,24 +779,10 @@ void r600_set_so_targets(struct pipe_context *ctx, rctx->streamout_append_bitmask = append_bitmask; } -static int r600_shader_rebuild(struct pipe_context * ctx, struct r600_pipe_shader * shader) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - int r; - - r600_pipe_shader_destroy(ctx, shader); - r = r600_pipe_shader_create(ctx, shader); - if (r) { - return r; - } - r600_context_pipe_state_set(rctx, &shader->rstate); - - return 0; -} - static void r600_update_derived_state(struct r600_context *rctx) { struct pipe_context * ctx = (struct pipe_context*)rctx; + unsigned ps_dirty = 0; if (!rctx->blitter->running) { if (rctx->have_depth_fb || rctx->have_depth_texture) @@ -689,30 +793,29 @@ static void r600_update_derived_state(struct r600_context *rctx) r600_update_sampler_states(rctx); } - if ((rctx->ps_shader->shader.two_side != rctx->two_side) || - ((rctx->chip_class >= EVERGREEN) && rctx->ps_shader->shader.fs_write_all && - (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) { - r600_shader_rebuild(&rctx->context, rctx->ps_shader); - } + r600_shader_select(ctx, rctx->ps_shader, &ps_dirty); if (rctx->alpha_ref_dirty) { r600_update_alpha_ref(rctx); } if (rctx->ps_shader && ((rctx->sprite_coord_enable && - (rctx->ps_shader->sprite_coord_enable != rctx->sprite_coord_enable)) || - (rctx->rasterizer && rctx->rasterizer->flatshade != rctx->ps_shader->flatshade))) { + (rctx->ps_shader->current->sprite_coord_enable != rctx->sprite_coord_enable)) || + (rctx->rasterizer && rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade))) { if (rctx->chip_class >= EVERGREEN) - evergreen_pipe_shader_ps(ctx, rctx->ps_shader); + evergreen_pipe_shader_ps(ctx, rctx->ps_shader->current); else - r600_pipe_shader_ps(ctx, rctx->ps_shader); + r600_pipe_shader_ps(ctx, rctx->ps_shader->current); - r600_context_pipe_state_set(rctx, &rctx->ps_shader->rstate); + ps_dirty = 1; } + if (ps_dirty) + r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate); + if (rctx->dual_src_blend) - rctx->cb_shader_mask = rctx->ps_shader->ps_cb_shader_mask | rctx->fb_cb_shader_mask; + rctx->cb_shader_mask = rctx->ps_shader->current->ps_cb_shader_mask | rctx->fb_cb_shader_mask; else rctx->cb_shader_mask = rctx->fb_cb_shader_mask; } @@ -827,12 +930,12 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) if (rctx->chip_class <= R700) r600_pipe_state_mod_reg(&rctx->vgt, rctx->cb_color_control); r600_pipe_state_mod_reg(&rctx->vgt, - rctx->vs_shader->pa_cl_vs_out_cntl | - (rctx->rasterizer->clip_plane_enable & rctx->vs_shader->shader.clip_dist_write)); + rctx->vs_shader->current->pa_cl_vs_out_cntl | + (rctx->rasterizer->clip_plane_enable & rctx->vs_shader->current->shader.clip_dist_write)); r600_pipe_state_mod_reg(&rctx->vgt, rctx->pa_cl_clip_cntl | - (rctx->vs_shader->shader.clip_dist_write || - rctx->vs_shader->shader.vs_prohibit_ucps ? + (rctx->vs_shader->current->shader.clip_dist_write || + rctx->vs_shader->current->shader.vs_prohibit_ucps ? 0 : rctx->rasterizer->clip_plane_enable & 0x3F)); r600_context_pipe_state_set(rctx, &rctx->vgt);