radeonsi: add an si_set_rw_shader_buffer convenience function
[mesa.git] / src / gallium / drivers / radeonsi / si_state_binning.c
index 91e616907eafceda6a7a836fbfae1f0b0444f6d7..3516e561282ef3c8232cee63629b14a6aa756a0c 100644 (file)
@@ -74,9 +74,9 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                if (!(cb_target_enabled_4bit & (0xf << (i * 4))))
                        continue;
 
-               struct r600_texture *rtex =
-                       (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
-               sum += rtex->surface.bpe;
+               struct si_texture *tex =
+                       (struct si_texture*)sctx->framebuffer.state.cbufs[i]->texture;
+               sum += tex->surface.bpe;
        }
 
        /* Multiply the sum by some function of the number of samples. */
@@ -190,13 +190,13 @@ static struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
                return size;
        }
 
-       struct r600_texture *rtex =
-               (struct r600_texture*)sctx->framebuffer.state.zsbuf->texture;
+       struct si_texture *tex =
+               (struct si_texture*)sctx->framebuffer.state.zsbuf->texture;
        unsigned depth_coeff = dsa->depth_enabled ? 5 : 0;
-       unsigned stencil_coeff = rtex->surface.has_stencil &&
+       unsigned stencil_coeff = tex->surface.has_stencil &&
                                 dsa->stencil_enabled ? 1 : 0;
        unsigned sum = 4 * (depth_coeff + stencil_coeff) *
-                      rtex->buffer.b.b.nr_samples;
+                      tex->buffer.b.b.nr_samples;
 
        static const si_bin_size_subtable table[] = {
                {
@@ -310,13 +310,18 @@ static struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
 
 static void si_emit_dpbb_disable(struct si_context *sctx)
 {
+       unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
        radeon_opt_set_context_reg(sctx, R_028C44_PA_SC_BINNER_CNTL_0,
                SI_TRACKED_PA_SC_BINNER_CNTL_0,
                S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
                S_028C44_DISABLE_START_OF_PRIM(1));
        radeon_opt_set_context_reg(sctx, R_028060_DB_DFSM_CONTROL,
                                   SI_TRACKED_DB_DFSM_CONTROL,
-                                  S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
+                                  S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
+                                  S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll_counter++;
 }
 
 void si_emit_dpbb_state(struct si_context *sctx)
@@ -338,18 +343,14 @@ void si_emit_dpbb_state(struct si_context *sctx)
                           G_02880C_COVERAGE_TO_MASK_ENABLE(db_shader_control) ||
                           blend->alpha_to_coverage;
 
-       /* This is ported from Vulkan, but it doesn't make much sense to me.
-        * Maybe it's for RE-Z? But Vulkan doesn't use RE-Z. TODO: Clarify this.
-        */
-       bool ps_can_reject_z_trivially =
+       bool db_can_reject_z_trivially =
                !G_02880C_Z_EXPORT_ENABLE(db_shader_control) ||
-               G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control);
+               G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control) ||
+               G_02880C_DEPTH_BEFORE_SHADER(db_shader_control);
 
-       /* Disable binning if PS can kill trivially with DB writes.
-        * Ported from Vulkan. (heuristic?)
-        */
+       /* Disable DPBB when it's believed to be inefficient. */
        if (ps_can_kill &&
-           ps_can_reject_z_trivially &&
+           db_can_reject_z_trivially &&
            sctx->framebuffer.state.zsbuf &&
            dsa->db_can_write) {
                si_emit_dpbb_disable(sctx);
@@ -404,7 +405,9 @@ void si_emit_dpbb_state(struct si_context *sctx)
        switch (sctx->family) {
        case CHIP_VEGA10:
        case CHIP_VEGA12:
+       case CHIP_VEGA20:
        case CHIP_RAVEN:
+       case CHIP_RAVEN2:
                /* Tuned for Raven. Vega might need different values. */
                context_states_per_bin = 5;
                persistent_states_per_bin = 31;
@@ -421,6 +424,7 @@ void si_emit_dpbb_state(struct si_context *sctx)
        if (bin_size.y >= 32)
                bin_size_extend.y = util_logbase2(bin_size.y) - 5;
 
+       unsigned initial_cdw = sctx->gfx_cs->current.cdw;
        radeon_opt_set_context_reg(
                sctx, R_028C44_PA_SC_BINNER_CNTL_0,
                SI_TRACKED_PA_SC_BINNER_CNTL_0,
@@ -436,5 +440,8 @@ void si_emit_dpbb_state(struct si_context *sctx)
                S_028C44_OPTIMAL_BIN_SELECTION(1));
        radeon_opt_set_context_reg(sctx, R_028060_DB_DFSM_CONTROL,
                                   SI_TRACKED_DB_DFSM_CONTROL,
-                                  S_028060_PUNCHOUT_MODE(punchout_mode));
+                                  S_028060_PUNCHOUT_MODE(punchout_mode) |
+                                  S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll_counter++;
 }