radeonsi: optimize rendering to linear color buffers
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
index 270b9fda079b1475e8e8f5fc1179553d43104527..d12c89b9401c185e770257e9418f4542d96bb268 100644 (file)
@@ -742,7 +742,6 @@ static void *si_create_rs_state(struct pipe_context *ctx,
                                S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
                                S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
        rs->pa_cl_clip_cntl =
-               S_028810_PS_UCP_MODE(3) |
                S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
                S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
                S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
@@ -1784,13 +1783,16 @@ boolean si_is_format_supported(struct pipe_screen *screen,
                }
        }
 
-       if (usage & PIPE_BIND_SAMPLER_VIEW) {
+       if (usage & (PIPE_BIND_SAMPLER_VIEW |
+                    PIPE_BIND_SHADER_IMAGE)) {
                if (target == PIPE_BUFFER) {
                        if (si_is_vertex_format_supported(screen, format))
-                               retval |= PIPE_BIND_SAMPLER_VIEW;
+                               retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
+                                                  PIPE_BIND_SHADER_IMAGE);
                } else {
                        if (si_is_sampler_format_supported(screen, format))
-                               retval |= PIPE_BIND_SAMPLER_VIEW;
+                               retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
+                                                  PIPE_BIND_SHADER_IMAGE);
                }
        }
 
@@ -2236,6 +2238,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        struct r600_surface *surf = NULL;
        struct r600_texture *rtex;
        bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer;
+       bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
        unsigned old_nr_samples = sctx->framebuffer.nr_samples;
        int i;
 
@@ -2272,6 +2275,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
        sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
                                  util_format_is_pure_integer(state->cbufs[0]->format);
+       sctx->framebuffer.any_dst_linear = false;
 
        if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
                si_mark_atom_dirty(sctx, &sctx->db_render_state);
@@ -2302,6 +2306,10 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                if (rtex->fmask.size && rtex->cmask.size) {
                        sctx->framebuffer.compressed_cb_mask |= 1 << i;
                }
+
+               if (surf->level_info->mode == RADEON_SURF_MODE_LINEAR_ALIGNED)
+                       sctx->framebuffer.any_dst_linear = true;
+
                r600_context_add_resource_size(ctx, surf->base.texture);
 
                p_atomic_inc(&rtex->framebuffers_bound);
@@ -2331,6 +2339,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        si_mark_atom_dirty(sctx, &sctx->cb_render_state);
        si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
 
+       if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
+               si_mark_atom_dirty(sctx, &sctx->msaa_config);
+
        if (sctx->framebuffer.nr_samples != old_nr_samples) {
                si_mark_atom_dirty(sctx, &sctx->msaa_config);
                si_mark_atom_dirty(sctx, &sctx->db_render_state);
@@ -2443,8 +2454,16 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                }
 
                cb_color_info = cb->cb_color_info | tex->cb_color_info;
-               if (tex->dcc_offset && cb->level_info->dcc_enabled)
-                       cb_color_info |= S_028C70_DCC_ENABLE(1);
+
+               if (tex->dcc_offset && cb->level_info->dcc_enabled) {
+                       bool is_msaa_resolve_dst = state->cbufs[0] &&
+                                                  state->cbufs[0]->texture->nr_samples > 1 &&
+                                                  state->cbufs[1] == &cb->base &&
+                                                  state->cbufs[1]->texture->nr_samples <= 1;
+
+                       if (!is_msaa_resolve_dst)
+                               cb_color_info |= S_028C70_DCC_ENABLE(1);
+               }
 
                radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
                                           sctx->b.chip_class >= VI ? 14 : 13);
@@ -2545,13 +2564,27 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+       unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes;
+       /* 33% faster rendering to linear color buffers */
+       bool dst_is_linear = sctx->framebuffer.any_dst_linear;
+       unsigned sc_mode_cntl_1 =
+               S_028A4C_WALK_SIZE(dst_is_linear) |
+               S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
+               S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
+               /* always 1: */
+               S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
+               S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
+               S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
+               S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
+               S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+               S_028A4C_FORCE_EOV_REZ_ENABLE(1);
 
        cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
                                sctx->ps_iter_samples,
-                               sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0);
+                               sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
+                               sc_mode_cntl_1);
 }
 
-
 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
 {
        struct si_context *sctx = (struct si_context *)ctx;
@@ -3511,10 +3544,15 @@ si_write_harvested_raster_configs(struct si_context *sctx,
        unsigned se_mask[4];
        unsigned se;
 
-       se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
-       se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
-       se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
-       se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+       se_mask[0] = ((1 << rb_per_se) - 1);
+       se_mask[1] = (se_mask[0] << rb_per_se);
+       se_mask[2] = (se_mask[1] << rb_per_se);
+       se_mask[3] = (se_mask[2] << rb_per_se);
+
+       se_mask[0] &= rb_mask;
+       se_mask[1] &= rb_mask;
+       se_mask[2] &= rb_mask;
+       se_mask[3] &= rb_mask;
 
        assert(num_se == 1 || num_se == 2 || num_se == 4);
        assert(sh_per_se == 1 || sh_per_se == 2);
@@ -3524,19 +3562,6 @@ si_write_harvested_raster_configs(struct si_context *sctx,
         * fields are for, so I'm leaving them as their default
         * values. */
 
-       if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
-                            (!se_mask[2] && !se_mask[3]))) {
-               raster_config_1 &= C_028354_SE_PAIR_MAP;
-
-               if (!se_mask[0] && !se_mask[1]) {
-                       raster_config_1 |=
-                               S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
-               } else {
-                       raster_config_1 |=
-                               S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
-               }
-       }
-
        for (se = 0; se < num_se; se++) {
                unsigned raster_config_se = raster_config;
                unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
@@ -3616,8 +3641,6 @@ si_write_harvested_raster_configs(struct si_context *sctx,
                                       S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
                                       S_030800_INSTANCE_BROADCAST_WRITES(1));
                si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
-               if (sctx->b.chip_class >= CIK)
-                       si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
        }
 
        /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
@@ -3625,10 +3648,26 @@ si_write_harvested_raster_configs(struct si_context *sctx,
                si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
                               SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
                               INSTANCE_BROADCAST_WRITES);
-       else
+       else {
                si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
                               S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
                               S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+               if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
+                                    (!se_mask[2] && !se_mask[3]))) {
+                       raster_config_1 &= C_028354_SE_PAIR_MAP;
+
+                       if (!se_mask[0] && !se_mask[1]) {
+                               raster_config_1 |=
+                                       S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
+                       } else {
+                               raster_config_1 |=
+                                       S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
+                       }
+               }
+
+               si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
+       }
 }
 
 static void si_init_config(struct si_context *sctx)
@@ -3822,16 +3861,28 @@ static void si_init_config(struct si_context *sctx)
        }
 
        if (sctx->b.chip_class >= VI) {
+               unsigned vgt_tess_distribution;
+
                si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
                               S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
                               S_028424_OVERWRITE_COMBINER_WATERMARK(4));
                si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
                si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
-               si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
-                              S_028B50_ACCUM_ISOLINE(32) |
-                              S_028B50_ACCUM_TRI(11) |
-                              S_028B50_ACCUM_QUAD(11) |
-                              S_028B50_DONUT_SPLIT(16));
+
+               vgt_tess_distribution =
+                       S_028B50_ACCUM_ISOLINE(32) |
+                       S_028B50_ACCUM_TRI(11) |
+                       S_028B50_ACCUM_QUAD(11) |
+                       S_028B50_DONUT_SPLIT(16);
+
+               /* Testing with Unigine Heaven extreme tesselation yielded best results
+                * with TRAP_SPLIT = 3.
+                */
+               if (sctx->b.family == CHIP_FIJI ||
+                   sctx->b.family >= CHIP_POLARIS10)
+                       vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
+
+               si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
        }
 
        if (sctx->b.family == CHIP_STONEY)