radeonsi: don't update dependent states if it has no effect (v2)
authorMarek Olšák <marek.olsak@amd.com>
Mon, 5 Jun 2017 00:00:52 +0000 (02:00 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 8 Jun 2017 21:29:07 +0000 (23:29 +0200)
This and the previous clip_regs commit decrease IB sizes and the number of
si_update_shaders invocations as follows:

                 IB size   si_update_shaders calls
Borderlands 2      -10%            -27%
Deus Ex: MD         -5%            -11%
Talos Principle     -8%            -30%

v2: always dirty cb_render_state in set_framebuffer_state

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 3f471a3672e68f93510366a727fef63b7c68f802..c7bc7b0a9cac200758c807b6f06ec693d3d7d5de 100644 (file)
@@ -603,9 +603,27 @@ static void *si_create_blend_state(struct pipe_context *ctx,
 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
 {
        struct si_context *sctx = (struct si_context *)ctx;
-       si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
-       si_mark_atom_dirty(sctx, &sctx->cb_render_state);
-       sctx->do_update_shaders = true;
+       struct si_state_blend *old_blend = sctx->queued.named.blend;
+       struct si_state_blend *blend = (struct si_state_blend *)state;
+
+       if (!state)
+               return;
+
+       if (!old_blend ||
+            old_blend->cb_target_mask != blend->cb_target_mask ||
+            old_blend->dual_src_blend != blend->dual_src_blend)
+               si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
+       si_pm4_bind_state(sctx, blend, state);
+
+       if (!old_blend ||
+           old_blend->cb_target_mask != blend->cb_target_mask ||
+           old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
+           old_blend->alpha_to_one != blend->alpha_to_one ||
+           old_blend->dual_src_blend != blend->dual_src_blend ||
+           old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
+           old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
+               sctx->do_update_shaders = true;
 }
 
 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
@@ -921,10 +939,27 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
        si_pm4_bind_state(sctx, rasterizer, rs);
        si_update_poly_offset_state(sctx);
 
-       si_mark_atom_dirty(sctx, &sctx->clip_regs);
+       if (!old_rs ||
+           old_rs->clip_plane_enable != rs->clip_plane_enable ||
+           old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
+               si_mark_atom_dirty(sctx, &sctx->clip_regs);
+
        sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
                rs->line_stipple_enable;
-       sctx->do_update_shaders = true;
+
+       if (!old_rs ||
+           old_rs->clip_plane_enable != rs->clip_plane_enable ||
+           old_rs->rasterizer_discard != rs->rasterizer_discard ||
+           old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
+           old_rs->flatshade != rs->flatshade ||
+           old_rs->two_side != rs->two_side ||
+           old_rs->multisample_enable != rs->multisample_enable ||
+           old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
+           old_rs->poly_smooth != rs->poly_smooth ||
+           old_rs->line_smooth != rs->line_smooth ||
+           old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
+           old_rs->force_persample_interp != rs->force_persample_interp)
+               sctx->do_update_shaders = true;
 }
 
 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
@@ -1062,6 +1097,7 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
 {
         struct si_context *sctx = (struct si_context *)ctx;
+       struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
         struct si_state_dsa *dsa = state;
 
         if (!state)
@@ -1074,7 +1110,9 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
                sctx->stencil_ref.dsa_part = dsa->stencil_ref;
                si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
        }
-       sctx->do_update_shaders = true;
+
+       if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
+               sctx->do_update_shaders = true;
 }
 
 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
@@ -3693,6 +3731,9 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
                        return NULL;
                }
 
+               if (elements[i].instance_divisor)
+                       v->uses_instance_divisors = true;
+
                if (!used[vbo_index]) {
                        v->first_vb_use_mask |= 1 << i;
                        used[vbo_index] = true;
@@ -3806,11 +3847,19 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
 {
        struct si_context *sctx = (struct si_context *)ctx;
+       struct si_vertex_element *old = sctx->vertex_elements;
        struct si_vertex_element *v = (struct si_vertex_element*)state;
 
        sctx->vertex_elements = v;
        sctx->vertex_buffers_dirty = true;
-       sctx->do_update_shaders = true;
+
+       if (v &&
+           (!old ||
+            old->count != v->count ||
+            old->uses_instance_divisors != v->uses_instance_divisors ||
+            v->uses_instance_divisors || /* we don't check which divisors changed */
+            memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count)))
+               sctx->do_update_shaders = true;
 }
 
 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
index 275f830613c9687c51c7482e52fffa03fc0d720e..4da51be3eafa98974c892097e398d81434e9b559 100644 (file)
@@ -109,6 +109,7 @@ struct si_vertex_element
        uint32_t                        rsrc_word3[SI_MAX_ATTRIBS];
        uint32_t                        format_size[SI_MAX_ATTRIBS];
        struct pipe_vertex_element      elements[SI_MAX_ATTRIBS];
+       bool                            uses_instance_divisors;
 };
 
 union si_state {
index c21f855d7a2850ea29164bd4371991ff272e5e25..677a6de88c20fdfa8e3a00bfa865c7d56a909e66 100644 (file)
@@ -2314,18 +2314,25 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
 static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 {
        struct si_context *sctx = (struct si_context *)ctx;
+       struct si_shader_selector *old_sel = sctx->ps_shader.cso;
        struct si_shader_selector *sel = state;
 
        /* skip if supplied shader is one already in use */
-       if (sctx->ps_shader.cso == sel)
+       if (old_sel == sel)
                return;
 
        sctx->ps_shader.cso = sel;
        sctx->ps_shader.current = sel ? sel->first_variant : NULL;
        sctx->do_update_shaders = true;
-       if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
-               si_update_tess_uses_prim_id(sctx);
-       si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
+       if (sel) {
+               if (sctx->ia_multi_vgt_param_key.u.uses_tess)
+                       si_update_tess_uses_prim_id(sctx);
+
+               if (!old_sel ||
+                   old_sel->info.colors_written != sel->info.colors_written)
+                       si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+       }
        si_set_active_descriptors_for_shader(sctx, sel);
 }
 
@@ -3088,6 +3095,9 @@ bool si_update_shaders(struct si_context *sctx)
        struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
        struct si_shader *old_vs = si_get_vs_state(sctx);
        bool old_clip_disable = old_vs ? old_vs->key.opt.hw_vs.clip_disable : false;
+       struct si_shader *old_ps = sctx->ps_shader.current;
+       unsigned old_spi_shader_col_format =
+               old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
        int r;
 
        compiler_state.tm = sctx->tm;
@@ -3212,7 +3222,11 @@ bool si_update_shaders(struct si_context *sctx)
                        si_mark_atom_dirty(sctx, &sctx->spi_map);
                }
 
-               if (sctx->screen->b.rbplus_allowed && si_pm4_state_changed(sctx, ps))
+               if (sctx->screen->b.rbplus_allowed &&
+                   si_pm4_state_changed(sctx, ps) &&
+                   (!old_ps ||
+                    old_spi_shader_col_format !=
+                    sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format))
                        si_mark_atom_dirty(sctx, &sctx->cb_render_state);
 
                if (sctx->ps_db_shader_control != db_shader_control) {