2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 /* This file handles register programming of primitive binning. */
29 #include "radeon/r600_cs.h"
35 struct si_bin_size_map
{
41 typedef struct si_bin_size_map si_bin_size_subtable
[3][9];
43 /* Find the bin size where sum is >= table[i].start and < table[i + 1].start. */
44 static struct uvec2
si_find_bin_size(struct si_screen
*sscreen
,
45 const si_bin_size_subtable table
[],
48 unsigned log_num_rb_per_se
=
49 util_logbase2_ceil(sscreen
->b
.info
.num_render_backends
/
50 sscreen
->b
.info
.max_se
);
51 unsigned log_num_se
= util_logbase2_ceil(sscreen
->b
.info
.max_se
);
54 /* Get the chip-specific subtable. */
55 const struct si_bin_size_map
*subtable
=
56 &table
[log_num_rb_per_se
][log_num_se
][0];
58 for (i
= 0; subtable
[i
].start
!= UINT_MAX
; i
++) {
59 if (sum
>= subtable
[i
].start
&& sum
< subtable
[i
+ 1].start
)
63 struct uvec2 size
= {subtable
[i
].bin_size_x
, subtable
[i
].bin_size_y
};
67 static struct uvec2
si_get_color_bin_size(struct si_context
*sctx
,
68 unsigned cb_target_enabled_4bit
)
70 unsigned nr_samples
= sctx
->framebuffer
.nr_samples
;
73 /* Compute the sum of all Bpp. */
74 for (unsigned i
= 0; i
< sctx
->framebuffer
.state
.nr_cbufs
; i
++) {
75 if (!(cb_target_enabled_4bit
& (0xf << (i
* 4))))
78 struct r600_texture
*rtex
=
79 (struct r600_texture
*)sctx
->framebuffer
.state
.cbufs
[i
]->texture
;
80 sum
+= rtex
->surface
.bpe
;
83 /* Multiply the sum by some function of the number of samples. */
84 if (nr_samples
>= 2) {
85 if (sctx
->ps_iter_samples
>= 2)
91 static const si_bin_size_subtable table
[] = {
95 /* One shader engine */
104 /* Two shader engines */
113 /* Four shader engines */
124 /* One shader engine */
133 /* Two shader engines */
142 /* Four shader engines */
155 /* One shader engine */
165 /* Two shader engines */
176 /* Four shader engines */
189 return si_find_bin_size(sctx
->screen
, table
, sum
);
192 static struct uvec2
si_get_depth_bin_size(struct si_context
*sctx
)
194 struct si_state_dsa
*dsa
= sctx
->queued
.named
.dsa
;
196 if (!sctx
->framebuffer
.state
.zsbuf
||
197 (!dsa
->depth_enabled
&& !dsa
->stencil_enabled
)) {
198 /* Return the max size. */
199 struct uvec2 size
= {512, 512};
203 struct r600_texture
*rtex
=
204 (struct r600_texture
*)sctx
->framebuffer
.state
.zsbuf
->texture
;
205 unsigned depth_coeff
= dsa
->depth_enabled
? 5 : 0;
206 unsigned stencil_coeff
= rtex
->surface
.has_stencil
&&
207 dsa
->stencil_enabled
? 1 : 0;
208 unsigned sum
= 4 * (depth_coeff
+ stencil_coeff
) *
209 sctx
->framebuffer
.nr_samples
;
211 static const si_bin_size_subtable table
[] = {
225 // Two shader engines
236 // Four shader engines
261 // Two shader engines
273 // Four shader engines
299 // Two shader engines
311 // Four shader engines
324 return si_find_bin_size(sctx
->screen
, table
, sum
);
327 static void si_emit_dpbb_disable(struct si_context
*sctx
)
329 struct radeon_winsys_cs
*cs
= sctx
->b
.gfx
.cs
;
331 radeon_set_context_reg(cs
, R_028C44_PA_SC_BINNER_CNTL_0
,
332 S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC
) |
333 S_028C44_DISABLE_START_OF_PRIM(1));
334 radeon_set_context_reg(cs
, R_028060_DB_DFSM_CONTROL
,
335 S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF
));
338 void si_emit_dpbb_state(struct si_context
*sctx
, struct r600_atom
*state
)
340 struct si_screen
*sscreen
= sctx
->screen
;
341 struct si_state_blend
*blend
= sctx
->queued
.named
.blend
;
342 struct si_state_dsa
*dsa
= sctx
->queued
.named
.dsa
;
343 unsigned db_shader_control
= sctx
->ps_db_shader_control
;
345 assert(sctx
->b
.chip_class
>= GFX9
);
347 if (!sscreen
->dpbb_allowed
|| !blend
|| !dsa
) {
348 si_emit_dpbb_disable(sctx
);
352 bool ps_can_kill
= G_02880C_KILL_ENABLE(db_shader_control
) ||
353 G_02880C_MASK_EXPORT_ENABLE(db_shader_control
) ||
354 G_02880C_COVERAGE_TO_MASK_ENABLE(db_shader_control
) ||
355 blend
->alpha_to_coverage
;
357 /* This is ported from Vulkan, but it doesn't make much sense to me.
358 * Maybe it's for RE-Z? But Vulkan doesn't use RE-Z. TODO: Clarify this.
360 bool ps_can_reject_z_trivially
=
361 !G_02880C_Z_EXPORT_ENABLE(db_shader_control
) ||
362 G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control
);
364 /* Disable binning if PS can kill trivially with DB writes.
365 * Ported from Vulkan. (heuristic?)
368 ps_can_reject_z_trivially
&&
369 sctx
->framebuffer
.state
.zsbuf
&&
371 si_emit_dpbb_disable(sctx
);
375 /* Compute the bin size. */
376 /* TODO: We could also look at enabled pixel shader outputs. */
377 unsigned cb_target_enabled_4bit
= sctx
->framebuffer
.colorbuf_enabled_4bit
&
378 blend
->cb_target_enabled_4bit
;
379 struct uvec2 color_bin_size
=
380 si_get_color_bin_size(sctx
, cb_target_enabled_4bit
);
381 struct uvec2 depth_bin_size
= si_get_depth_bin_size(sctx
);
383 unsigned color_area
= color_bin_size
.x
* color_bin_size
.y
;
384 unsigned depth_area
= depth_bin_size
.x
* depth_bin_size
.y
;
386 struct uvec2 bin_size
= color_area
< depth_area
? color_bin_size
389 if (!bin_size
.x
|| !bin_size
.y
) {
390 si_emit_dpbb_disable(sctx
);
394 /* Enable DFSM if it's preferred. */
395 unsigned punchout_mode
= V_028060_FORCE_OFF
;
396 bool disable_start_of_prim
= true;
398 if (sscreen
->dfsm_allowed
&&
399 cb_target_enabled_4bit
&&
400 !G_02880C_KILL_ENABLE(db_shader_control
) &&
401 /* These two also imply that DFSM is disabled when PS writes to memory. */
402 !G_02880C_EXEC_ON_HIER_FAIL(db_shader_control
) &&
403 !G_02880C_EXEC_ON_NOOP(db_shader_control
) &&
404 G_02880C_Z_ORDER(db_shader_control
) == V_02880C_EARLY_Z_THEN_LATE_Z
) {
405 punchout_mode
= V_028060_AUTO
;
406 disable_start_of_prim
= (cb_target_enabled_4bit
&
407 blend
->blend_enable_4bit
) != 0;
410 /* Tunable parameters. Also test with DFSM enabled/disabled. */
411 unsigned context_states_per_bin
; /* allowed range: [0, 5] */
412 unsigned persistent_states_per_bin
; /* allowed range: [0, 31] */
413 unsigned fpovs_per_batch
; /* allowed range: [0, 255], 0 = unlimited */
415 switch (sctx
->b
.family
) {
418 /* Tuned for Raven. Vega might need different values. */
419 context_states_per_bin
= 5;
420 persistent_states_per_bin
= 31;
421 fpovs_per_batch
= 63;
427 /* Emit registers. */
428 struct uvec2 bin_size_extend
= {};
429 if (bin_size
.x
>= 32)
430 bin_size_extend
.x
= util_logbase2(bin_size
.x
) - 5;
431 if (bin_size
.y
>= 32)
432 bin_size_extend
.y
= util_logbase2(bin_size
.y
) - 5;
434 struct radeon_winsys_cs
*cs
= sctx
->b
.gfx
.cs
;
435 radeon_set_context_reg(cs
, R_028C44_PA_SC_BINNER_CNTL_0
,
436 S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED
) |
437 S_028C44_BIN_SIZE_X(bin_size
.x
== 16) |
438 S_028C44_BIN_SIZE_Y(bin_size
.y
== 16) |
439 S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend
.x
) |
440 S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend
.y
) |
441 S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin
) |
442 S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin
) |
443 S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim
) |
444 S_028C44_FPOVS_PER_BATCH(fpovs_per_batch
) |
445 S_028C44_OPTIMAL_BIN_SELECTION(1));
446 radeon_set_context_reg(cs
, R_028060_DB_DFSM_CONTROL
,
447 S_028060_PUNCHOUT_MODE(punchout_mode
));