nir: Add lower_rotate flag and set to true in all drivers
[mesa.git] / src / gallium / drivers / radeonsi / si_state_binning.c
index 2605b59bbef004d19b8caac9707ae39bd87499eb..a6b1830b661b160679f26dbf7d2a92001e2dd9a9 100644 (file)
@@ -25,7 +25,7 @@
 /* This file handles register programming of primitive binning. */
 
 #include "si_build_pm4.h"
-#include "gfx9d.h"
+#include "sid.h"
 
 struct uvec2 {
        unsigned x, y;
@@ -37,7 +37,7 @@ struct si_bin_size_map {
        unsigned bin_size_y;
 };
 
-typedef struct si_bin_size_map si_bin_size_subtable[3][9];
+typedef struct si_bin_size_map si_bin_size_subtable[3][10];
 
 /* Find the bin size where sum is >= table[i].start and < table[i + 1].start. */
 static struct uvec2 si_find_bin_size(struct si_screen *sscreen,
@@ -54,7 +54,7 @@ static struct uvec2 si_find_bin_size(struct si_screen *sscreen,
        const struct si_bin_size_map *subtable =
                &table[log_num_rb_per_se][log_num_se][0];
 
-       for (i = 0; subtable[i].start != UINT_MAX; i++) {
+       for (i = 0; subtable[i].bin_size_x != 0; i++) {
                if (sum >= subtable[i].start && sum < subtable[i + 1].start)
                        break;
        }
@@ -66,7 +66,7 @@ static struct uvec2 si_find_bin_size(struct si_screen *sscreen,
 static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                                          unsigned cb_target_enabled_4bit)
 {
-       unsigned nr_samples = sctx->framebuffer.nr_samples;
+       unsigned num_fragments = sctx->framebuffer.nr_color_samples;
        unsigned sum = 0;
 
        /* Compute the sum of all Bpp. */
@@ -74,15 +74,15 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                if (!(cb_target_enabled_4bit & (0xf << (i * 4))))
                        continue;
 
-               struct r600_texture *rtex =
-                       (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
-               sum += rtex->surface.bpe;
+               struct si_texture *tex =
+                       (struct si_texture*)sctx->framebuffer.state.cbufs[i]->texture;
+               sum += tex->surface.bpe;
        }
 
        /* Multiply the sum by some function of the number of samples. */
-       if (nr_samples >= 2) {
+       if (num_fragments >= 2) {
                if (si_get_ps_iter_samples(sctx) >= 2)
-                       sum *= nr_samples;
+                       sum *= num_fragments;
                else
                        sum *= 2;
        }
@@ -97,7 +97,6 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                                {        2,   32,  128 },
                                {        3,   16,  128 },
                                {       17,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                        {
                                /* Two shader engines */
@@ -106,7 +105,6 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                                {        3,   32,  128 },
                                {        5,   16,  128 },
                                {       17,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                        {
                                /* Four shader engines */
@@ -114,7 +112,6 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                                {        3,   64,  128 },
                                {        5,   16,  128 },
                                {       17,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                },
                {
@@ -124,9 +121,8 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                                {        0,  128,  128 },
                                {        2,   64,  128 },
                                {        3,   32,  128 },
-                               {        5,   16,  128 },
+                               {        9,   16,  128 },
                                {       33,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                        {
                                /* Two shader engines */
@@ -135,7 +131,6 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                                {        5,   32,  128 },
                                {        9,   16,  128 },
                                {       33,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                        {
                                /* Four shader engines */
@@ -145,7 +140,6 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                                {        5,   64,  128 },
                                {        9,   16,  128 },
                                {       33,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                },
                {
@@ -157,8 +151,7 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                                {        3,   64,  128 },
                                {        5,   32,  128 },
                                {        9,   16,  128 },
-                               {       33,    0,    0 },
-                               { UINT_MAX,    0,    0 },
+                               {       17,    0,    0 },
                        },
                        {
                                /* Two shader engines */
@@ -169,18 +162,16 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
                                {        9,   32,  128 },
                                {       17,   16,  128 },
                                {       33,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                        {
                                /* Four shader engines */
                                {        0,  256,  512 },
-                               {        2,  256,  256 },
-                               {        3,  128,  256 },
-                               {        5,  128,  128 },
-                               {        9,   64,  128 },
-                               {       17,   16,  128 },
+                               {        2,  128,  512 },
+                               {        3,   64,  512 },
+                               {        5,   32,  512 },
+                               {        9,   32,  256 },
+                               {       17,   32,  128 },
                                {       33,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                },
        };
@@ -199,86 +190,80 @@ static struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
                return size;
        }
 
-       struct r600_texture *rtex =
-               (struct r600_texture*)sctx->framebuffer.state.zsbuf->texture;
+       struct si_texture *tex =
+               (struct si_texture*)sctx->framebuffer.state.zsbuf->texture;
        unsigned depth_coeff = dsa->depth_enabled ? 5 : 0;
-       unsigned stencil_coeff = rtex->surface.has_stencil &&
+       unsigned stencil_coeff = tex->surface.has_stencil &&
                                 dsa->stencil_enabled ? 1 : 0;
        unsigned sum = 4 * (depth_coeff + stencil_coeff) *
-                      sctx->framebuffer.nr_samples;
+                      tex->buffer.b.b.nr_samples;
 
        static const si_bin_size_subtable table[] = {
                {
                        // One RB / SE
                        {
                                // One shader engine
-                               {        0,  128,  256 },
-                               {        2,  128,  128 },
+                               {        0,   64,  512 },
+                               {        2,   64,  256 },
                                {        4,   64,  128 },
                                {        7,   32,  128 },
                                {       13,   16,  128 },
                                {       49,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                        {
                                // Two shader engines
-                               {        0,  256,  256 },
-                               {        2,  128,  256 },
-                               {        4,  128,  128 },
+                               {        0,  128,  512 },
+                               {        2,   64,  512 },
+                               {        4,   64,  256 },
                                {        7,   64,  128 },
                                {       13,   32,  128 },
                                {       25,   16,  128 },
                                {       49,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                        {
                                // Four shader engines
                                {        0,  256,  512 },
-                               {        2,  256,  256 },
-                               {        4,  128,  256 },
-                               {        7,  128,  128 },
+                               {        2,  128,  512 },
+                               {        4,   64,  512 },
+                               {        7,   64,  256 },
                                {       13,   64,  128 },
                                {       25,   16,  128 },
                                {       49,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                },
                {
                        // Two RB / SE
                        {
                                // One shader engine
-                               {        0,  256,  256 },
-                               {        2,  128,  256 },
-                               {        4,  128,  128 },
+                               {        0,  128,  512 },
+                               {        2,   64,  512 },
+                               {        4,   64,  256 },
                                {        7,   64,  128 },
                                {       13,   32,  128 },
                                {       25,   16,  128 },
                                {       97,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                        {
                                // Two shader engines
                                {        0,  256,  512 },
-                               {        2,  256,  256 },
-                               {        4,  128,  256 },
-                               {        7,  128,  128 },
+                               {        2,  128,  512 },
+                               {        4,   64,  512 },
+                               {        7,   64,  256 },
                                {       13,   64,  128 },
                                {       25,   32,  128 },
                                {       49,   16,  128 },
                                {       97,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                        {
                                // Four shader engines
                                {        0,  512,  512 },
                                {        2,  256,  512 },
-                               {        4,  256,  256 },
-                               {        7,  128,  256 },
-                               {       13,  128,  128 },
+                               {        4,  128,  512 },
+                               {        7,   64,  512 },
+                               {       13,   64,  256 },
                                {       25,   64,  128 },
                                {       49,   16,  128 },
                                {       97,    0,    0 },
-                               { UINT_MAX,    0,    0 },
                        },
                },
                {
@@ -286,36 +271,36 @@ static struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
                        {
                                // One shader engine
                                {        0,  256,  512 },
-                               {        2,  256,  256 },
-                               {        4,  128,  256 },
-                               {        7,  128,  128 },
+                               {        2,  128,  512 },
+                               {        4,   64,  512 },
+                               {        7,   64,  256 },
                                {       13,   64,  128 },
                                {       25,   32,  128 },
                                {       49,   16,  128 },
-                               { UINT_MAX,    0,    0 },
+                               {      193,    0,    0 },
                        },
                        {
                                // Two shader engines
                                {        0,  512,  512 },
                                {        2,  256,  512 },
-                               {        4,  256,  256 },
-                               {        7,  128,  256 },
-                               {       13,  128,  128 },
+                               {        4,  128,  512 },
+                               {        7,   64,  512 },
+                               {       13,   64,  256 },
                                {       25,   64,  128 },
                                {       49,   32,  128 },
                                {       97,   16,  128 },
-                               { UINT_MAX,    0,    0 },
+                               {      193,    0,    0 },
                        },
                        {
                                // Four shader engines
                                {        0,  512,  512 },
                                {        4,  256,  512 },
-                               {        7,  256,  256 },
-                               {       13,  128,  256 },
-                               {       25,  128,  128 },
-                               {       49,   64,  128 },
+                               {        7,  128,  512 },
+                               {       13,   64,  512 },
+                               {       25,   32,  512 },
+                               {       49,   32,  256 },
                                {       97,   16,  128 },
-                               { UINT_MAX,    0,    0 },
+                               {      193,    0,    0 },
                        },
                },
        };
@@ -325,13 +310,18 @@ static struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
 
 static void si_emit_dpbb_disable(struct si_context *sctx)
 {
-       struct radeon_winsys_cs *cs = sctx->gfx_cs;
-
-       radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
-                              S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
-                              S_028C44_DISABLE_START_OF_PRIM(1));
-       radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
-                              S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
+       unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
+       radeon_opt_set_context_reg(sctx, R_028C44_PA_SC_BINNER_CNTL_0,
+               SI_TRACKED_PA_SC_BINNER_CNTL_0,
+               S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+               S_028C44_DISABLE_START_OF_PRIM(1));
+       radeon_opt_set_context_reg(sctx, R_028060_DB_DFSM_CONTROL,
+                                  SI_TRACKED_DB_DFSM_CONTROL,
+                                  S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
+                                  S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll = true;
 }
 
 void si_emit_dpbb_state(struct si_context *sctx)
@@ -353,18 +343,14 @@ void si_emit_dpbb_state(struct si_context *sctx)
                           G_02880C_COVERAGE_TO_MASK_ENABLE(db_shader_control) ||
                           blend->alpha_to_coverage;
 
-       /* This is ported from Vulkan, but it doesn't make much sense to me.
-        * Maybe it's for RE-Z? But Vulkan doesn't use RE-Z. TODO: Clarify this.
-        */
-       bool ps_can_reject_z_trivially =
+       bool db_can_reject_z_trivially =
                !G_02880C_Z_EXPORT_ENABLE(db_shader_control) ||
-               G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control);
+               G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control) ||
+               G_02880C_DEPTH_BEFORE_SHADER(db_shader_control);
 
-       /* Disable binning if PS can kill trivially with DB writes.
-        * Ported from Vulkan. (heuristic?)
-        */
+       /* Disable DPBB when it's believed to be inefficient. */
        if (ps_can_kill &&
-           ps_can_reject_z_trivially &&
+           db_can_reject_z_trivially &&
            sctx->framebuffer.state.zsbuf &&
            dsa->db_can_write) {
                si_emit_dpbb_disable(sctx);
@@ -393,8 +379,13 @@ void si_emit_dpbb_state(struct si_context *sctx)
        /* Enable DFSM if it's preferred. */
        unsigned punchout_mode = V_028060_FORCE_OFF;
        bool disable_start_of_prim = true;
+       bool zs_eqaa_dfsm_bug = sctx->chip_class == GFX9 &&
+                               sctx->framebuffer.state.zsbuf &&
+                               sctx->framebuffer.nr_samples !=
+                               MAX2(1, sctx->framebuffer.state.zsbuf->texture->nr_samples);
 
        if (sscreen->dfsm_allowed &&
+           !zs_eqaa_dfsm_bug &&
            cb_target_enabled_4bit &&
            !G_02880C_KILL_ENABLE(db_shader_control) &&
            /* These two also imply that DFSM is disabled when PS writes to memory. */
@@ -411,18 +402,10 @@ void si_emit_dpbb_state(struct si_context *sctx)
        unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
        unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
 
-       switch (sctx->family) {
-       case CHIP_VEGA10:
-       case CHIP_VEGA12:
-       case CHIP_RAVEN:
-               /* Tuned for Raven. Vega might need different values. */
-               context_states_per_bin = 5;
-               persistent_states_per_bin = 31;
-               fpovs_per_batch = 63;
-               break;
-       default:
-               assert(0);
-       }
+       /* Tuned for Raven. Vega might need different values. */
+       context_states_per_bin = 5;
+       persistent_states_per_bin = 31;
+       fpovs_per_batch = 63;
 
        /* Emit registers. */
        struct uvec2 bin_size_extend = {};
@@ -431,18 +414,24 @@ void si_emit_dpbb_state(struct si_context *sctx)
        if (bin_size.y >= 32)
                bin_size_extend.y = util_logbase2(bin_size.y) - 5;
 
-       struct radeon_winsys_cs *cs = sctx->gfx_cs;
-       radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
-                              S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
-                              S_028C44_BIN_SIZE_X(bin_size.x == 16) |
-                              S_028C44_BIN_SIZE_Y(bin_size.y == 16) |
-                              S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend.x) |
-                              S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) |
-                              S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin) |
-                              S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin) |
-                              S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
-                              S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
-                              S_028C44_OPTIMAL_BIN_SELECTION(1));
-       radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
-                              S_028060_PUNCHOUT_MODE(punchout_mode));
+       unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+       radeon_opt_set_context_reg(
+               sctx, R_028C44_PA_SC_BINNER_CNTL_0,
+               SI_TRACKED_PA_SC_BINNER_CNTL_0,
+               S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
+               S_028C44_BIN_SIZE_X(bin_size.x == 16) |
+               S_028C44_BIN_SIZE_Y(bin_size.y == 16) |
+               S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend.x) |
+               S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) |
+               S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin) |
+               S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin) |
+               S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
+               S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
+               S_028C44_OPTIMAL_BIN_SELECTION(1));
+       radeon_opt_set_context_reg(sctx, R_028060_DB_DFSM_CONTROL,
+                                  SI_TRACKED_DB_DFSM_CONTROL,
+                                  S_028060_PUNCHOUT_MODE(punchout_mode) |
+                                  S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll = true;
 }