- struct si_screen *sscreen = sctx->screen;
- struct si_state_blend *blend = sctx->queued.named.blend;
- struct si_state_dsa *dsa = sctx->queued.named.dsa;
- unsigned db_shader_control = sctx->ps_db_shader_control;
-
- assert(sctx->chip_class >= GFX9);
-
- if (!sscreen->dpbb_allowed || !blend || !dsa) {
- si_emit_dpbb_disable(sctx);
- return;
- }
-
- bool ps_can_kill = G_02880C_KILL_ENABLE(db_shader_control) ||
- G_02880C_MASK_EXPORT_ENABLE(db_shader_control) ||
- G_02880C_COVERAGE_TO_MASK_ENABLE(db_shader_control) ||
- blend->alpha_to_coverage;
-
- /* This is ported from Vulkan, but it doesn't make much sense to me.
- * Maybe it's for RE-Z? But Vulkan doesn't use RE-Z. TODO: Clarify this.
- */
- bool ps_can_reject_z_trivially =
- !G_02880C_Z_EXPORT_ENABLE(db_shader_control) ||
- G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control);
-
- /* Disable binning if PS can kill trivially with DB writes.
- * Ported from Vulkan. (heuristic?)
- */
- if (ps_can_kill &&
- ps_can_reject_z_trivially &&
- sctx->framebuffer.state.zsbuf &&
- dsa->db_can_write) {
- si_emit_dpbb_disable(sctx);
- return;
- }
-
- /* Compute the bin size. */
- /* TODO: We could also look at enabled pixel shader outputs. */
- unsigned cb_target_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit &
- blend->cb_target_enabled_4bit;
- struct uvec2 color_bin_size =
- si_get_color_bin_size(sctx, cb_target_enabled_4bit);
- struct uvec2 depth_bin_size = si_get_depth_bin_size(sctx);
-
- unsigned color_area = color_bin_size.x * color_bin_size.y;
- unsigned depth_area = depth_bin_size.x * depth_bin_size.y;
-
- struct uvec2 bin_size = color_area < depth_area ? color_bin_size
- : depth_bin_size;
-
- if (!bin_size.x || !bin_size.y) {
- si_emit_dpbb_disable(sctx);
- return;
- }
-
- /* Enable DFSM if it's preferred. */
- unsigned punchout_mode = V_028060_FORCE_OFF;
- bool disable_start_of_prim = true;
-
- if (sscreen->dfsm_allowed &&
- cb_target_enabled_4bit &&
- !G_02880C_KILL_ENABLE(db_shader_control) &&
- /* These two also imply that DFSM is disabled when PS writes to memory. */
- !G_02880C_EXEC_ON_HIER_FAIL(db_shader_control) &&
- !G_02880C_EXEC_ON_NOOP(db_shader_control) &&
- G_02880C_Z_ORDER(db_shader_control) == V_02880C_EARLY_Z_THEN_LATE_Z) {
- punchout_mode = V_028060_AUTO;
- disable_start_of_prim = (cb_target_enabled_4bit &
- blend->blend_enable_4bit) != 0;
- }
-
- /* Tunable parameters. Also test with DFSM enabled/disabled. */
- unsigned context_states_per_bin; /* allowed range: [0, 5] */
- unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
- unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
-
- switch (sctx->family) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_RAVEN:
- /* Tuned for Raven. Vega might need different values. */
- context_states_per_bin = 5;
- persistent_states_per_bin = 31;
- fpovs_per_batch = 63;
- break;
- default:
- assert(0);
- }
-
- /* Emit registers. */
- struct uvec2 bin_size_extend = {};
- if (bin_size.x >= 32)
- bin_size_extend.x = util_logbase2(bin_size.x) - 5;
- if (bin_size.y >= 32)
- bin_size_extend.y = util_logbase2(bin_size.y) - 5;
-
- struct radeon_winsys_cs *cs = sctx->gfx_cs;
- radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
- S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
- S_028C44_BIN_SIZE_X(bin_size.x == 16) |
- S_028C44_BIN_SIZE_Y(bin_size.y == 16) |
- S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend.x) |
- S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) |
- S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin) |
- S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin) |
- S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
- S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
- S_028C44_OPTIMAL_BIN_SELECTION(1));
- radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
- S_028060_PUNCHOUT_MODE(punchout_mode));
+ struct si_screen *sscreen = sctx->screen;
+ struct si_state_blend *blend = sctx->queued.named.blend;
+ struct si_state_dsa *dsa = sctx->queued.named.dsa;
+ unsigned db_shader_control = sctx->ps_db_shader_control;
+
+ assert(sctx->chip_class >= GFX9);
+
+ if (!sscreen->dpbb_allowed || sctx->dpbb_force_off) {
+ si_emit_dpbb_disable(sctx);
+ return;
+ }
+
+ bool ps_can_kill =
+ G_02880C_KILL_ENABLE(db_shader_control) || G_02880C_MASK_EXPORT_ENABLE(db_shader_control) ||
+ G_02880C_COVERAGE_TO_MASK_ENABLE(db_shader_control) || blend->alpha_to_coverage;
+
+ bool db_can_reject_z_trivially = !G_02880C_Z_EXPORT_ENABLE(db_shader_control) ||
+ G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control) ||
+ G_02880C_DEPTH_BEFORE_SHADER(db_shader_control);
+
+ /* Disable DPBB when it's believed to be inefficient. */
+ if (sscreen->info.num_render_backends > 4 && ps_can_kill && db_can_reject_z_trivially &&
+ sctx->framebuffer.state.zsbuf && dsa->db_can_write) {
+ si_emit_dpbb_disable(sctx);
+ return;
+ }
+
+ /* Compute the bin size. */
+ /* TODO: We could also look at enabled pixel shader outputs. */
+ unsigned cb_target_enabled_4bit =
+ sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_enabled_4bit;
+ struct uvec2 color_bin_size, depth_bin_size;
+
+ if (sctx->chip_class >= GFX10) {
+ gfx10_get_bin_sizes(sctx, cb_target_enabled_4bit, &color_bin_size, &depth_bin_size);
+ } else {
+ color_bin_size = si_get_color_bin_size(sctx, cb_target_enabled_4bit);
+ depth_bin_size = si_get_depth_bin_size(sctx);
+ }
+
+ unsigned color_area = color_bin_size.x * color_bin_size.y;
+ unsigned depth_area = depth_bin_size.x * depth_bin_size.y;
+
+ struct uvec2 bin_size = color_area < depth_area ? color_bin_size : depth_bin_size;
+
+ if (!bin_size.x || !bin_size.y) {
+ si_emit_dpbb_disable(sctx);
+ return;
+ }
+
+ /* Enable DFSM if it's preferred. */
+ unsigned punchout_mode = V_028060_FORCE_OFF;
+ bool disable_start_of_prim = true;
+ bool zs_eqaa_dfsm_bug =
+ sctx->chip_class == GFX9 && sctx->framebuffer.state.zsbuf &&
+ sctx->framebuffer.nr_samples != MAX2(1, sctx->framebuffer.state.zsbuf->texture->nr_samples);
+
+ if (sscreen->dfsm_allowed && !zs_eqaa_dfsm_bug && cb_target_enabled_4bit &&
+ !G_02880C_KILL_ENABLE(db_shader_control) &&
+ /* These two also imply that DFSM is disabled when PS writes to memory. */
+ !G_02880C_EXEC_ON_HIER_FAIL(db_shader_control) &&
+ !G_02880C_EXEC_ON_NOOP(db_shader_control) &&
+ G_02880C_Z_ORDER(db_shader_control) == V_02880C_EARLY_Z_THEN_LATE_Z) {
+ punchout_mode = V_028060_AUTO;
+ disable_start_of_prim = (cb_target_enabled_4bit & blend->blend_enable_4bit) != 0;
+ }
+
+ /* Tunable parameters. Also test with DFSM enabled/disabled. */
+ unsigned context_states_per_bin; /* allowed range: [1, 6] */
+ unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
+ unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+
+ /* Tuned for Raven. Vega might need different values. */
+ if (sscreen->info.has_dedicated_vram) {
+ if (sscreen->info.num_render_backends > 4) {
+ context_states_per_bin = 1;
+ persistent_states_per_bin = 1;
+ } else {
+ context_states_per_bin = 3;
+ persistent_states_per_bin = 8;
+ }
+ } else {
+ /* This is a workaround for:
+ * https://bugs.freedesktop.org/show_bug.cgi?id=110214
+ * (an alternative is to insert manual BATCH_BREAK event when
+ * a context_roll is detected). */
+ context_states_per_bin = sctx->screen->info.has_gfx9_scissor_bug ? 1 : 6;
+ /* Using 32 here can cause GPU hangs on RAVEN1 */
+ persistent_states_per_bin = 16;
+ }
+ fpovs_per_batch = 63;
+
+ /* Emit registers. */
+ struct uvec2 bin_size_extend = {};
+ if (bin_size.x >= 32)
+ bin_size_extend.x = util_logbase2(bin_size.x) - 5;
+ if (bin_size.y >= 32)
+ bin_size_extend.y = util_logbase2(bin_size.y) - 5;
+
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+ radeon_opt_set_context_reg(
+ sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0,
+ S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.x == 16) |
+ S_028C44_BIN_SIZE_Y(bin_size.y == 16) | S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend.x) |
+ S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) |
+ S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin - 1) |
+ S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin - 1) |
+ S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
+ S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1) |
+ S_028C44_FLUSH_ON_BINNING_TRANSITION((sctx->family == CHIP_VEGA12 ||
+ sctx->family == CHIP_VEGA20 ||
+ sctx->family >= CHIP_RAVEN2) &&
+ sctx->last_binning_enabled != 1));
+
+ unsigned db_dfsm_control =
+ sctx->chip_class >= GFX10 ? R_028038_DB_DFSM_CONTROL : R_028060_DB_DFSM_CONTROL;
+ radeon_opt_set_context_reg(
+ sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL,
+ S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll = true;
+
+ sctx->last_binning_enabled = true;