radeonsi: use re-Z
authorMarek Olšák <marek.olsak@amd.com>
Sun, 21 Feb 2016 23:40:04 +0000 (00:40 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 29 Feb 2016 23:18:19 +0000 (00:18 +0100)
This can increase perf for shaders that kill pixels (kill, alpha-test,
alpha-to-coverage).

v2: add comments

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index ff5c24d89187bd1389a65c335f7042461797c8fa..637d26488df2643a7e8710a89845e78150eed91b 100644 (file)
@@ -365,6 +365,7 @@ struct si_shader {
        struct r600_resource            *scratch_bo;
        union si_shader_key             key;
        bool                            is_binary_shared;
+       unsigned                        z_order;
 
        /* The following data is all that's needed for binary shaders. */
        struct radeon_shader_binary     binary;
index 2dfdbeb8d8f895172cbd18110ad8dc947420ea8b..b23b17ad77b1e571bf04011b1ae0a49275042c34 100644 (file)
@@ -1339,10 +1339,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
                            sctx->ps_db_shader_control;
 
        /* Bug workaround for smoothing (overrasterization) on SI. */
-       if (sctx->b.chip_class == SI && sctx->smoothing_enabled)
+       if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
+               db_shader_control &= C_02880C_Z_ORDER;
                db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
-       else
-               db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
+       }
 
        /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
        if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
index a6753a7a528b12b678ca3402d7988b98e9a7f429..321b87d80a691b7affc94c8c090c40c5f4405dbd 100644 (file)
@@ -789,6 +789,17 @@ static void si_shader_ps(struct si_shader *shader)
                       S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
                       S_00B02C_USER_SGPR(num_user_sgprs) |
                       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
+
+       /* Prefer RE_Z if the shader is complex enough. The requirement is either:
+        * - the shader uses at least 2 VMEM instructions, or
+        * - the code size is at least 50 2-dword instructions or 100 1-dword
+        *   instructions.
+        */
+       if (info->num_memory_instructions >= 2 ||
+           shader->binary.code_size > 100*4)
+               shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z;
+       else
+               shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
 }
 
 static void si_shader_init_pm4_state(struct si_shader *shader)
@@ -1985,15 +1996,18 @@ bool si_update_shaders(struct si_context *sctx)
        si_update_vgt_shader_config(sctx);
 
        if (sctx->ps_shader.cso) {
-               unsigned db_shader_control =
-                       sctx->ps_shader.cso->db_shader_control |
-                       S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
+               unsigned db_shader_control;
 
                r = si_shader_select(ctx, &sctx->ps_shader);
                if (r)
                        return false;
                si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
 
+               db_shader_control =
+                       sctx->ps_shader.cso->db_shader_control |
+                       S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS) |
+                       S_02880C_Z_ORDER(sctx->ps_shader.current->z_order);
+
                if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
                    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
                    sctx->flatshade != rs->flatshade) {