r600g: do fine-grained sampler state updates
authorMarek Olšák <maraeo@gmail.com>
Mon, 10 Sep 2012 02:06:20 +0000 (04:06 +0200)
committerMarek Olšák <maraeo@gmail.com>
Thu, 13 Sep 2012 18:18:44 +0000 (20:18 +0200)
Update only those sampler states which are changed in a shader stage,
instead of always updating all sampler states in the shader stage.
That requires keeping a bitmask of those states which are enabled, and those
states which are dirty at a given point (subset of enabled states).

This is similar to how sampler views, constant buffers, and vertex buffers
are handled.

Reviewed-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c

index ee71747ef22203da8a94082e66ce78ab2302c4f4..fda0790548733f292b5c6406c0a0f0a368eb2e4a 100644 (file)
@@ -2080,23 +2080,26 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
                                unsigned border_index_reg)
 {
        struct radeon_winsys_cs *cs = rctx->cs;
-       unsigned i;
+       uint32_t dirty_mask = texinfo->states.dirty_mask;
 
-       for (i = 0; i < texinfo->n_samplers; i++) {
+       while (dirty_mask) {
+               struct r600_pipe_sampler_state *rstate;
+               unsigned i = u_bit_scan(&dirty_mask);
+
+               rstate = texinfo->states.states[i];
+               assert(rstate);
 
-               if (texinfo->samplers[i] == NULL) {
-                       continue;
-               }
                r600_write_value(cs, PKT3(PKT3_SET_SAMPLER, 3, 0));
                r600_write_value(cs, (resource_id_base + i) * 3);
-               r600_write_array(cs, 3, texinfo->samplers[i]->tex_sampler_words);
+               r600_write_array(cs, 3, rstate->tex_sampler_words);
 
-               if (texinfo->samplers[i]->border_color_use) {
+               if (rstate->border_color_use) {
                        r600_write_config_reg_seq(cs, border_index_reg, 5);
                        r600_write_value(cs, i);
-                       r600_write_array(cs, 4, texinfo->samplers[i]->border_color);
+                       r600_write_array(cs, 4, rstate->border_color);
                }
        }
+       texinfo->states.dirty_mask = 0;
 }
 
 static void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
@@ -2149,8 +2152,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
        /* shader program */
        r600_init_atom(rctx, &rctx->cs_shader_state.atom, id++, evergreen_emit_cs_shader, 0);
        /* sampler */
-       r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, evergreen_emit_vs_sampler_states, 0);
-       r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, evergreen_emit_ps_sampler_states, 0);
+       r600_init_atom(rctx, &rctx->vs_samplers.states.atom, id++, evergreen_emit_vs_sampler_states, 0);
+       r600_init_atom(rctx, &rctx->ps_samplers.states.atom, id++, evergreen_emit_ps_sampler_states, 0);
        /* resources */
        r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0);
        r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0);
index 072df143f1328a8472de27af60925acbf3da5260..584b7fc1806f12da46d8e0e7b4ca7b009c792832 100644 (file)
@@ -79,8 +79,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
 
        if (op & R600_SAVE_TEXTURES) {
                util_blitter_save_fragment_sampler_states(
-                       rctx->blitter, rctx->ps_samplers.n_samplers,
-                       (void**)rctx->ps_samplers.samplers);
+                       rctx->blitter, util_last_bit(rctx->ps_samplers.states.enabled_mask),
+                       (void**)rctx->ps_samplers.states.states);
 
                util_blitter_save_fragment_sampler_views(
                        rctx->blitter, util_last_bit(rctx->ps_samplers.views.enabled_mask),
index 1db5f0d645cdfa96663b684d3f43498646b9d53b..020f626cde950883271d06a2a09f7b381e96e36b 100644 (file)
@@ -1043,9 +1043,12 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
        r600_atom_dirty(ctx, &ctx->alphatest_state.atom);
        r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
        r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
-       /* reemit sampler, will only matter if atom_sampler.num_dw != 0 */
-       r600_atom_dirty(ctx, &ctx->vs_samplers.atom_sampler);
-       r600_atom_dirty(ctx, &ctx->ps_samplers.atom_sampler);
+
+       ctx->vs_samplers.states.dirty_mask = ctx->vs_samplers.states.enabled_mask;
+       ctx->ps_samplers.states.dirty_mask = ctx->ps_samplers.states.enabled_mask;
+       r600_sampler_states_dirty(ctx, &ctx->vs_samplers.states);
+       r600_sampler_states_dirty(ctx, &ctx->ps_samplers.states);
+
        if (ctx->chip_class <= R700) {
                r600_atom_dirty(ctx, &ctx->seamless_cube_map.atom);
        }
index 5966cefb75a0c495d85433a7a5f2a845e3a78da3..8d1acd265045bcab69d6e2698967e83465e9b130 100644 (file)
@@ -261,11 +261,17 @@ struct r600_samplerview_state {
        uint32_t                        compressed_colortex_mask;
 };
 
+struct r600_sampler_states {
+       struct r600_atom                atom;
+       struct r600_pipe_sampler_state  *states[NUM_TEX_UNITS];
+       uint32_t                        enabled_mask;
+       uint32_t                        dirty_mask;
+       uint32_t                        has_bordercolor_mask; /* which states contain the border color */
+};
+
 struct r600_textures_info {
        struct r600_samplerview_state   views;
-       struct r600_atom                atom_sampler;
-       struct r600_pipe_sampler_state  *samplers[NUM_TEX_UNITS];
-       unsigned                        n_samplers;
+       struct r600_sampler_states      states;
        bool                            is_array_sampler[NUM_TEX_UNITS];
 };
 
@@ -571,6 +577,8 @@ void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned
 void r600_vertex_buffers_dirty(struct r600_context *rctx);
 void r600_sampler_views_dirty(struct r600_context *rctx,
                              struct r600_samplerview_state *state);
+void r600_sampler_states_dirty(struct r600_context *rctx,
+                              struct r600_sampler_states *state);
 void r600_set_max_scissor(struct r600_context *rctx);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
 void r600_draw_rectangle(struct blitter_context *blitter,
index a802d935aa21e3e070f4a191d01e877d0eb2496e..1e2e43e3081a29ac7026531df1c6c4d2b5368839 100644 (file)
@@ -1932,42 +1932,47 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
                                unsigned border_color_reg)
 {
        struct radeon_winsys_cs *cs = rctx->cs;
-       unsigned i;
+       uint32_t dirty_mask = texinfo->states.dirty_mask;
 
-       for (i = 0; i < texinfo->n_samplers; i++) {
+       while (dirty_mask) {
+               struct r600_pipe_sampler_state *rstate;
+               struct r600_pipe_sampler_view *rview;
+               unsigned i = u_bit_scan(&dirty_mask);
 
-               if (texinfo->samplers[i] == NULL) {
-                       continue;
-               }
+               rstate = texinfo->states.states[i];
+               assert(rstate);
+               rview = texinfo->views.views[i];
 
                /* TEX_ARRAY_OVERRIDE must be set for array textures to disable
                 * filtering between layers.
                 * Don't update TEX_ARRAY_OVERRIDE if we don't have the sampler view.
                 */
-               if (texinfo->views.views[i]) {
-                       if (texinfo->views.views[i]->base.texture->target == PIPE_TEXTURE_1D_ARRAY ||
-                           texinfo->views.views[i]->base.texture->target == PIPE_TEXTURE_2D_ARRAY) {
-                               texinfo->samplers[i]->tex_sampler_words[0] |= S_03C000_TEX_ARRAY_OVERRIDE(1);
+               if (rview) {
+                       enum pipe_texture_target target = rview->base.texture->target;
+                       if (target == PIPE_TEXTURE_1D_ARRAY ||
+                           target == PIPE_TEXTURE_2D_ARRAY) {
+                               rstate->tex_sampler_words[0] |= S_03C000_TEX_ARRAY_OVERRIDE(1);
                                texinfo->is_array_sampler[i] = true;
                        } else {
-                               texinfo->samplers[i]->tex_sampler_words[0] &= C_03C000_TEX_ARRAY_OVERRIDE;
+                               rstate->tex_sampler_words[0] &= C_03C000_TEX_ARRAY_OVERRIDE;
                                texinfo->is_array_sampler[i] = false;
                        }
                }
 
                r600_write_value(cs, PKT3(PKT3_SET_SAMPLER, 3, 0));
                r600_write_value(cs, (resource_id_base + i) * 3);
-               r600_write_array(cs, 3, texinfo->samplers[i]->tex_sampler_words);
+               r600_write_array(cs, 3, rstate->tex_sampler_words);
 
-               if (texinfo->samplers[i]->border_color_use) {
+               if (rstate->border_color_use) {
                        unsigned offset;
 
                        offset = border_color_reg;
                        offset += i * 16;
                        r600_write_config_reg_seq(cs, offset, 4);
-                       r600_write_array(cs, 4, texinfo->samplers[i]->border_color);
+                       r600_write_array(cs, 4, rstate->border_color);
                }
        }
+       texinfo->states.dirty_mask = 0;
 }
 
 static void r600_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
@@ -2025,8 +2030,8 @@ void r600_init_state_functions(struct r600_context *rctx)
        /* sampler must be emited before TA_CNTL_AUX otherwise DISABLE_CUBE_WRAP change
         * does not take effect (TA_CNTL_AUX emited by r600_emit_seamless_cube_map)
         */
-       r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, r600_emit_vs_sampler_states, 0);
-       r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, r600_emit_ps_sampler_states, 0);
+       r600_init_atom(rctx, &rctx->vs_samplers.states.atom, id++, r600_emit_vs_sampler_states, 0);
+       r600_init_atom(rctx, &rctx->ps_samplers.states.atom, id++, r600_emit_ps_sampler_states, 0);
        /* resource */
        r600_init_atom(rctx, &rctx->vs_samplers.views.atom, id++, r600_emit_vs_sampler_views, 0);
        r600_init_atom(rctx, &rctx->ps_samplers.views.atom, id++, r600_emit_ps_sampler_views, 0);
index 13995e04ba561170b40036d1cb282ba51b35cc71..87183169a2a371eec461d3f4472473dd150a32ff 100644 (file)
@@ -346,6 +346,20 @@ static void r600_sampler_view_destroy(struct pipe_context *ctx,
        FREE(resource);
 }
 
+void r600_sampler_states_dirty(struct r600_context *rctx,
+                              struct r600_sampler_states *state)
+{
+       if (state->dirty_mask) {
+               if (state->dirty_mask & state->has_bordercolor_mask) {
+                       rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
+               }
+               state->atom.num_dw =
+                       util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
+                       util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5;
+               r600_atom_dirty(rctx, &state->atom);
+       }
+}
+
 static void r600_bind_sampler_states(struct pipe_context *pipe,
                                unsigned shader,
                               unsigned start,
@@ -353,8 +367,13 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
 {
        struct r600_context *rctx = (struct r600_context *)pipe;
        struct r600_textures_info *dst;
+       struct r600_pipe_sampler_state **rstates = (struct r600_pipe_sampler_state**)states;
        int seamless_cube_map = -1;
        unsigned i;
+       /* This sets 1-bit for states with index >= count. */
+       uint32_t disable_mask = ~((1ull << count) - 1);
+       /* These are the new states set by this function. */
+       uint32_t new_mask = 0;
 
        assert(start == 0); /* XXX fix below */
 
@@ -370,33 +389,47 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
                return;
        }
 
-       memcpy(dst->samplers, states, sizeof(void*) * count);
-       dst->n_samplers = count;
-       dst->atom_sampler.num_dw = 0;
-
        for (i = 0; i < count; i++) {
-               struct r600_pipe_sampler_state *sampler = states[i];
+               struct r600_pipe_sampler_state *rstate = rstates[i];
 
-               if (sampler == NULL) {
+               if (rstate == dst->states.states[i]) {
                        continue;
                }
-               if (sampler->border_color_use) {
-                       dst->atom_sampler.num_dw += 11;
-                       rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
+
+               if (rstate) {
+                       if (rstate->border_color_use) {
+                               dst->states.has_bordercolor_mask |= 1 << i;
+                       } else {
+                               dst->states.has_bordercolor_mask &= ~(1 << i);
+                       }
+                       seamless_cube_map = rstate->seamless_cube_map;
+
+                       new_mask |= 1 << i;
                } else {
-                       dst->atom_sampler.num_dw += 5;
+                       disable_mask |= 1 << i;
                }
-               seamless_cube_map = sampler->seamless_cube_map;
        }
-       if (rctx->chip_class <= R700 && seamless_cube_map != -1 && seamless_cube_map != rctx->seamless_cube_map.enabled) {
+
+       memcpy(dst->states.states, rstates, sizeof(void*) * count);
+       memset(dst->states.states + count, 0, sizeof(void*) * (NUM_TEX_UNITS - count));
+
+       dst->states.enabled_mask &= ~disable_mask;
+       dst->states.dirty_mask &= dst->states.enabled_mask;
+       dst->states.enabled_mask |= new_mask;
+       dst->states.dirty_mask |= new_mask;
+       dst->states.has_bordercolor_mask &= dst->states.enabled_mask;
+
+       r600_sampler_states_dirty(rctx, &dst->states);
+
+       /* Seamless cubemap state. */
+       if (rctx->chip_class <= R700 &&
+           seamless_cube_map != -1 &&
+           seamless_cube_map != rctx->seamless_cube_map.enabled) {
                /* change in TA_CNTL_AUX need a pipeline flush */
                rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
                rctx->seamless_cube_map.enabled = seamless_cube_map;
                r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
        }
-       if (dst->atom_sampler.num_dw) {
-               r600_atom_dirty(rctx, &dst->atom_sampler);
-       }
 }
 
 static void r600_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
@@ -540,6 +573,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
        struct r600_context *rctx = (struct r600_context *) pipe;
        struct r600_textures_info *dst;
        struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views;
+       uint32_t dirty_sampler_states_mask = 0;
        unsigned i;
        /* This sets 1-bit for textures with index >= count. */
        uint32_t disable_mask = ~((1ull << count) - 1);
@@ -594,12 +628,13 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
                                dst->views.compressed_colortex_mask &= ~(1 << i);
                        }
 
-                       /* Changing from array to non-arrays textures and vice
-                        * versa requires updating TEX_ARRAY_OVERRIDE on R6xx-R7xx. */
+                       /* Changing from array to non-arrays textures and vice versa requires
+                        * updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */
                        if (rctx->chip_class <= R700 &&
+                           (dst->states.enabled_mask & (1 << i)) &&
                            (rviews[i]->base.texture->target == PIPE_TEXTURE_1D_ARRAY ||
                             rviews[i]->base.texture->target == PIPE_TEXTURE_2D_ARRAY) != dst->is_array_sampler[i]) {
-                               r600_atom_dirty(rctx, &dst->atom_sampler);
+                               dirty_sampler_states_mask |= 1 << i;
                        }
 
                        pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
@@ -618,6 +653,11 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
        dst->views.compressed_colortex_mask &= dst->views.enabled_mask;
 
        r600_sampler_views_dirty(rctx, &dst->views);
+
+       if (dirty_sampler_states_mask) {
+               dst->states.dirty_mask |= dirty_sampler_states_mask;
+               r600_sampler_states_dirty(rctx, &dst->states);
+       }
 }
 
 static void r600_set_vs_sampler_views(struct pipe_context *ctx, unsigned count,