From 3fe78594b1221358f4ba96072d952e33a7e54a76 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 10 Sep 2012 04:06:20 +0200 Subject: [PATCH] r600g: do fine-grained sampler state updates Update only those sampler states which are changed in a shader stage, instead of always updating all sampler states in the shader stage. That requires keeping a bitmask of those states which are enabled, and those states which are dirty at a given point (subset of enabled states). This is similar to how sampler views, constant buffers, and vertex buffers are handled. Reviewed-by: Jerome Glisse --- src/gallium/drivers/r600/evergreen_state.c | 23 +++--- src/gallium/drivers/r600/r600_blit.c | 4 +- src/gallium/drivers/r600/r600_hw_context.c | 9 ++- src/gallium/drivers/r600/r600_pipe.h | 14 +++- src/gallium/drivers/r600/r600_state.c | 35 +++++---- src/gallium/drivers/r600/r600_state_common.c | 76 +++++++++++++++----- 6 files changed, 110 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index ee71747ef22..fda07905487 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2080,23 +2080,26 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx, unsigned border_index_reg) { struct radeon_winsys_cs *cs = rctx->cs; - unsigned i; + uint32_t dirty_mask = texinfo->states.dirty_mask; - for (i = 0; i < texinfo->n_samplers; i++) { + while (dirty_mask) { + struct r600_pipe_sampler_state *rstate; + unsigned i = u_bit_scan(&dirty_mask); + + rstate = texinfo->states.states[i]; + assert(rstate); - if (texinfo->samplers[i] == NULL) { - continue; - } r600_write_value(cs, PKT3(PKT3_SET_SAMPLER, 3, 0)); r600_write_value(cs, (resource_id_base + i) * 3); - r600_write_array(cs, 3, texinfo->samplers[i]->tex_sampler_words); + r600_write_array(cs, 3, rstate->tex_sampler_words); - if (texinfo->samplers[i]->border_color_use) { + if (rstate->border_color_use) { r600_write_config_reg_seq(cs, border_index_reg, 5); r600_write_value(cs, i); - r600_write_array(cs, 4, texinfo->samplers[i]->border_color); + r600_write_array(cs, 4, rstate->border_color); } } + texinfo->states.dirty_mask = 0; } static void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) @@ -2149,8 +2152,8 @@ void evergreen_init_state_functions(struct r600_context *rctx) /* shader program */ r600_init_atom(rctx, &rctx->cs_shader_state.atom, id++, evergreen_emit_cs_shader, 0); /* sampler */ - r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, evergreen_emit_vs_sampler_states, 0); - r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, evergreen_emit_ps_sampler_states, 0); + r600_init_atom(rctx, &rctx->vs_samplers.states.atom, id++, evergreen_emit_vs_sampler_states, 0); + r600_init_atom(rctx, &rctx->ps_samplers.states.atom, id++, evergreen_emit_ps_sampler_states, 0); /* resources */ r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0); r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 072df143f13..584b7fc1806 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -79,8 +79,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op if (op & R600_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( - rctx->blitter, rctx->ps_samplers.n_samplers, - (void**)rctx->ps_samplers.samplers); + rctx->blitter, util_last_bit(rctx->ps_samplers.states.enabled_mask), + (void**)rctx->ps_samplers.states.states); util_blitter_save_fragment_sampler_views( rctx->blitter, util_last_bit(rctx->ps_samplers.views.enabled_mask), diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 1db5f0d645c..020f626cde9 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -1043,9 +1043,12 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) r600_atom_dirty(ctx, &ctx->alphatest_state.atom); r600_atom_dirty(ctx, &ctx->cb_misc_state.atom); r600_atom_dirty(ctx, &ctx->db_misc_state.atom); - /* reemit sampler, will only matter if atom_sampler.num_dw != 0 */ - r600_atom_dirty(ctx, &ctx->vs_samplers.atom_sampler); - r600_atom_dirty(ctx, &ctx->ps_samplers.atom_sampler); + + ctx->vs_samplers.states.dirty_mask = ctx->vs_samplers.states.enabled_mask; + ctx->ps_samplers.states.dirty_mask = ctx->ps_samplers.states.enabled_mask; + r600_sampler_states_dirty(ctx, &ctx->vs_samplers.states); + r600_sampler_states_dirty(ctx, &ctx->ps_samplers.states); + if (ctx->chip_class <= R700) { r600_atom_dirty(ctx, &ctx->seamless_cube_map.atom); } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 5966cefb75a..8d1acd26504 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -261,11 +261,17 @@ struct r600_samplerview_state { uint32_t compressed_colortex_mask; }; +struct r600_sampler_states { + struct r600_atom atom; + struct r600_pipe_sampler_state *states[NUM_TEX_UNITS]; + uint32_t enabled_mask; + uint32_t dirty_mask; + uint32_t has_bordercolor_mask; /* which states contain the border color */ +}; + struct r600_textures_info { struct r600_samplerview_state views; - struct r600_atom atom_sampler; - struct r600_pipe_sampler_state *samplers[NUM_TEX_UNITS]; - unsigned n_samplers; + struct r600_sampler_states states; bool is_array_sampler[NUM_TEX_UNITS]; }; @@ -571,6 +577,8 @@ void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned void r600_vertex_buffers_dirty(struct r600_context *rctx); void r600_sampler_views_dirty(struct r600_context *rctx, struct r600_samplerview_state *state); +void r600_sampler_states_dirty(struct r600_context *rctx, + struct r600_sampler_states *state); void r600_set_max_scissor(struct r600_context *rctx); void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state); void r600_draw_rectangle(struct blitter_context *blitter, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index a802d935aa2..1e2e43e3081 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1932,42 +1932,47 @@ static void r600_emit_sampler_states(struct r600_context *rctx, unsigned border_color_reg) { struct radeon_winsys_cs *cs = rctx->cs; - unsigned i; + uint32_t dirty_mask = texinfo->states.dirty_mask; - for (i = 0; i < texinfo->n_samplers; i++) { + while (dirty_mask) { + struct r600_pipe_sampler_state *rstate; + struct r600_pipe_sampler_view *rview; + unsigned i = u_bit_scan(&dirty_mask); - if (texinfo->samplers[i] == NULL) { - continue; - } + rstate = texinfo->states.states[i]; + assert(rstate); + rview = texinfo->views.views[i]; /* TEX_ARRAY_OVERRIDE must be set for array textures to disable * filtering between layers. * Don't update TEX_ARRAY_OVERRIDE if we don't have the sampler view. */ - if (texinfo->views.views[i]) { - if (texinfo->views.views[i]->base.texture->target == PIPE_TEXTURE_1D_ARRAY || - texinfo->views.views[i]->base.texture->target == PIPE_TEXTURE_2D_ARRAY) { - texinfo->samplers[i]->tex_sampler_words[0] |= S_03C000_TEX_ARRAY_OVERRIDE(1); + if (rview) { + enum pipe_texture_target target = rview->base.texture->target; + if (target == PIPE_TEXTURE_1D_ARRAY || + target == PIPE_TEXTURE_2D_ARRAY) { + rstate->tex_sampler_words[0] |= S_03C000_TEX_ARRAY_OVERRIDE(1); texinfo->is_array_sampler[i] = true; } else { - texinfo->samplers[i]->tex_sampler_words[0] &= C_03C000_TEX_ARRAY_OVERRIDE; + rstate->tex_sampler_words[0] &= C_03C000_TEX_ARRAY_OVERRIDE; texinfo->is_array_sampler[i] = false; } } r600_write_value(cs, PKT3(PKT3_SET_SAMPLER, 3, 0)); r600_write_value(cs, (resource_id_base + i) * 3); - r600_write_array(cs, 3, texinfo->samplers[i]->tex_sampler_words); + r600_write_array(cs, 3, rstate->tex_sampler_words); - if (texinfo->samplers[i]->border_color_use) { + if (rstate->border_color_use) { unsigned offset; offset = border_color_reg; offset += i * 16; r600_write_config_reg_seq(cs, offset, 4); - r600_write_array(cs, 4, texinfo->samplers[i]->border_color); + r600_write_array(cs, 4, rstate->border_color); } } + texinfo->states.dirty_mask = 0; } static void r600_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) @@ -2025,8 +2030,8 @@ void r600_init_state_functions(struct r600_context *rctx) /* sampler must be emited before TA_CNTL_AUX otherwise DISABLE_CUBE_WRAP change * does not take effect (TA_CNTL_AUX emited by r600_emit_seamless_cube_map) */ - r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, r600_emit_vs_sampler_states, 0); - r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, r600_emit_ps_sampler_states, 0); + r600_init_atom(rctx, &rctx->vs_samplers.states.atom, id++, r600_emit_vs_sampler_states, 0); + r600_init_atom(rctx, &rctx->ps_samplers.states.atom, id++, r600_emit_ps_sampler_states, 0); /* resource */ r600_init_atom(rctx, &rctx->vs_samplers.views.atom, id++, r600_emit_vs_sampler_views, 0); r600_init_atom(rctx, &rctx->ps_samplers.views.atom, id++, r600_emit_ps_sampler_views, 0); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 13995e04ba5..87183169a2a 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -346,6 +346,20 @@ static void r600_sampler_view_destroy(struct pipe_context *ctx, FREE(resource); } +void r600_sampler_states_dirty(struct r600_context *rctx, + struct r600_sampler_states *state) +{ + if (state->dirty_mask) { + if (state->dirty_mask & state->has_bordercolor_mask) { + rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; + } + state->atom.num_dw = + util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 + + util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5; + r600_atom_dirty(rctx, &state->atom); + } +} + static void r600_bind_sampler_states(struct pipe_context *pipe, unsigned shader, unsigned start, @@ -353,8 +367,13 @@ static void r600_bind_sampler_states(struct pipe_context *pipe, { struct r600_context *rctx = (struct r600_context *)pipe; struct r600_textures_info *dst; + struct r600_pipe_sampler_state **rstates = (struct r600_pipe_sampler_state**)states; int seamless_cube_map = -1; unsigned i; + /* This sets 1-bit for states with index >= count. */ + uint32_t disable_mask = ~((1ull << count) - 1); + /* These are the new states set by this function. */ + uint32_t new_mask = 0; assert(start == 0); /* XXX fix below */ @@ -370,33 +389,47 @@ static void r600_bind_sampler_states(struct pipe_context *pipe, return; } - memcpy(dst->samplers, states, sizeof(void*) * count); - dst->n_samplers = count; - dst->atom_sampler.num_dw = 0; - for (i = 0; i < count; i++) { - struct r600_pipe_sampler_state *sampler = states[i]; + struct r600_pipe_sampler_state *rstate = rstates[i]; - if (sampler == NULL) { + if (rstate == dst->states.states[i]) { continue; } - if (sampler->border_color_use) { - dst->atom_sampler.num_dw += 11; - rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; + + if (rstate) { + if (rstate->border_color_use) { + dst->states.has_bordercolor_mask |= 1 << i; + } else { + dst->states.has_bordercolor_mask &= ~(1 << i); + } + seamless_cube_map = rstate->seamless_cube_map; + + new_mask |= 1 << i; } else { - dst->atom_sampler.num_dw += 5; + disable_mask |= 1 << i; } - seamless_cube_map = sampler->seamless_cube_map; } - if (rctx->chip_class <= R700 && seamless_cube_map != -1 && seamless_cube_map != rctx->seamless_cube_map.enabled) { + + memcpy(dst->states.states, rstates, sizeof(void*) * count); + memset(dst->states.states + count, 0, sizeof(void*) * (NUM_TEX_UNITS - count)); + + dst->states.enabled_mask &= ~disable_mask; + dst->states.dirty_mask &= dst->states.enabled_mask; + dst->states.enabled_mask |= new_mask; + dst->states.dirty_mask |= new_mask; + dst->states.has_bordercolor_mask &= dst->states.enabled_mask; + + r600_sampler_states_dirty(rctx, &dst->states); + + /* Seamless cubemap state. */ + if (rctx->chip_class <= R700 && + seamless_cube_map != -1 && + seamless_cube_map != rctx->seamless_cube_map.enabled) { /* change in TA_CNTL_AUX need a pipeline flush */ rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; rctx->seamless_cube_map.enabled = seamless_cube_map; r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom); } - if (dst->atom_sampler.num_dw) { - r600_atom_dirty(rctx, &dst->atom_sampler); - } } static void r600_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, void **states) @@ -540,6 +573,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, struct r600_context *rctx = (struct r600_context *) pipe; struct r600_textures_info *dst; struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views; + uint32_t dirty_sampler_states_mask = 0; unsigned i; /* This sets 1-bit for textures with index >= count. */ uint32_t disable_mask = ~((1ull << count) - 1); @@ -594,12 +628,13 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, dst->views.compressed_colortex_mask &= ~(1 << i); } - /* Changing from array to non-arrays textures and vice - * versa requires updating TEX_ARRAY_OVERRIDE on R6xx-R7xx. */ + /* Changing from array to non-arrays textures and vice versa requires + * updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */ if (rctx->chip_class <= R700 && + (dst->states.enabled_mask & (1 << i)) && (rviews[i]->base.texture->target == PIPE_TEXTURE_1D_ARRAY || rviews[i]->base.texture->target == PIPE_TEXTURE_2D_ARRAY) != dst->is_array_sampler[i]) { - r600_atom_dirty(rctx, &dst->atom_sampler); + dirty_sampler_states_mask |= 1 << i; } pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]); @@ -618,6 +653,11 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, dst->views.compressed_colortex_mask &= dst->views.enabled_mask; r600_sampler_views_dirty(rctx, &dst->views); + + if (dirty_sampler_states_mask) { + dst->states.dirty_mask |= dirty_sampler_states_mask; + r600_sampler_states_dirty(rctx, &dst->states); + } } static void r600_set_vs_sampler_views(struct pipe_context *ctx, unsigned count, -- 2.30.2