gallium/radeon: add a heuristic enabling DCC for scanout surfaces (v2)
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
index ccae571cfd442b018c0bb6d6f065803ef0db8522..26831faa10736cbc2da97124a8a657b7d6f5fb5c 100644 (file)
@@ -810,20 +810,24 @@ static void *si_create_rs_state(struct pipe_context *ctx,
                float offset_scale = state->offset_scale * 16.0f;
                uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
 
-               switch (i) {
-               case 0: /* 16-bit zbuffer */
-                       offset_units *= 4.0f;
-                       pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
-                       break;
-               case 1: /* 24-bit zbuffer */
-                       offset_units *= 2.0f;
-                       pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
-                       break;
-               case 2: /* 32-bit zbuffer */
-                       offset_units *= 1.0f;
-                       pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
-                                                       S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
-                       break;
+               if (!state->offset_units_unscaled) {
+                       switch (i) {
+                       case 0: /* 16-bit zbuffer */
+                               offset_units *= 4.0f;
+                               pa_su_poly_offset_db_fmt_cntl =
+                                       S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
+                               break;
+                       case 1: /* 24-bit zbuffer */
+                               offset_units *= 2.0f;
+                               pa_su_poly_offset_db_fmt_cntl =
+                                       S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
+                               break;
+                       case 2: /* 32-bit zbuffer */
+                               offset_units *= 1.0f;
+                               pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
+                                                               S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
+                               break;
+                       }
                }
 
                si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
@@ -1286,7 +1290,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
        bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
                                          sscreen->b.info.drm_minor >= 31) ||
                                         sscreen->b.info.drm_major == 3;
-       boolean uniform = TRUE;
+       bool uniform = true;
        int i;
 
        /* Colorspace (return non-RGB formats directly). */
@@ -1747,7 +1751,7 @@ static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_
 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
 {
        return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
-               r600_translate_colorswap(format, FALSE) != ~0U;
+               r600_translate_colorswap(format, false) != ~0U;
 }
 
 static bool si_is_zs_format_supported(enum pipe_format format)
@@ -1755,25 +1759,28 @@ static bool si_is_zs_format_supported(enum pipe_format format)
        return si_translate_dbformat(format) != V_028040_Z_INVALID;
 }
 
-boolean si_is_format_supported(struct pipe_screen *screen,
-                               enum pipe_format format,
-                               enum pipe_texture_target target,
-                               unsigned sample_count,
-                               unsigned usage)
+static boolean si_is_format_supported(struct pipe_screen *screen,
+                                     enum pipe_format format,
+                                     enum pipe_texture_target target,
+                                     unsigned sample_count,
+                                     unsigned usage)
 {
        unsigned retval = 0;
 
        if (target >= PIPE_MAX_TEXTURE_TYPES) {
                R600_ERR("r600: unsupported texture type %d\n", target);
-               return FALSE;
+               return false;
        }
 
        if (!util_format_is_supported(format, usage))
-               return FALSE;
+               return false;
 
        if (sample_count > 1) {
                if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
-                       return FALSE;
+                       return false;
+
+               if (usage & PIPE_BIND_SHADER_IMAGE)
+                       return false;
 
                switch (sample_count) {
                case 2:
@@ -1782,11 +1789,11 @@ boolean si_is_format_supported(struct pipe_screen *screen,
                        break;
                case 16:
                        if (format == PIPE_FORMAT_NONE)
-                               return TRUE;
+                               return true;
                        else
-                               return FALSE;
+                               return false;
                default:
-                       return FALSE;
+                       return false;
                }
        }
 
@@ -2010,7 +2017,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
                R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
        }
        assert(format != V_028C70_COLOR_INVALID);
-       swap = r600_translate_colorswap(surf->base.format, FALSE);
+       swap = r600_translate_colorswap(surf->base.format, false);
        endian = si_colorformat_endian_swap(format);
 
        /* blend clamp should be set for all NORM/SRGB types */
@@ -2202,7 +2209,7 @@ static void si_init_depth_surface(struct si_context *sctx,
        surf->depth_initialized = true;
 }
 
-void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
+static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
 {
        for (int i = 0; i < state->nr_cbufs; ++i) {
                struct r600_surface *surf = NULL;
@@ -2229,6 +2236,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        unsigned old_nr_samples = sctx->framebuffer.nr_samples;
        int i;
 
+       for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+               if (!sctx->framebuffer.state.cbufs[i])
+                       continue;
+
+               rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
+               if (rtex->dcc_gather_statistics)
+                       vi_separate_dcc_stop_query(ctx, rtex);
+       }
+
        /* Only flush TC when changing the framebuffer state, because
         * the only client not using TC that can change textures is
         * the framebuffer.
@@ -2300,6 +2316,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                r600_context_add_resource_size(ctx, surf->base.texture);
 
                p_atomic_inc(&rtex->framebuffers_bound);
+
+               if (rtex->dcc_gather_statistics) {
+                       /* Dirty tracking must be enabled for DCC usage analysis. */
+                       sctx->framebuffer.compressed_cb_mask |= 1 << i;
+                       vi_separate_dcc_start_query(ctx, rtex);
+               }
        }
        /* Set the second SPI format for possible dual-src blending. */
        if (i == 1 && surf) {
@@ -2413,6 +2435,12 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                                RADEON_PRIO_CMASK);
                }
 
+               if (tex->dcc_separate_buffer)
+                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+                                                 tex->dcc_separate_buffer,
+                                                 RADEON_USAGE_READWRITE,
+                                                 RADEON_PRIO_DCC);
+
                /* Compute mutable surface parameters. */
                pitch_tile_max = cb->level_info->nblk_x / 8 - 1;
                slice_tile_max = cb->level_info->nblk_x *
@@ -2469,7 +2497,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                radeon_emit(cs, tex->color_clear_value[1]);     /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
 
                if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
-                       radeon_emit(cs, (tex->resource.gpu_address +
+                       radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
                                         tex->dcc_offset +
                                         tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8);
        }
@@ -2794,7 +2822,7 @@ si_make_texture_descriptor(struct si_screen *screen,
        state[7] = 0;
 
        if (tex->dcc_offset) {
-               unsigned swap = r600_translate_colorswap(pipe_format, FALSE);
+               unsigned swap = r600_translate_colorswap(pipe_format, false);
 
                state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
        } else {
@@ -3440,6 +3468,7 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
        if (rscreen->info.drm_major != 3)
                return;
 
+       assert(rtex->dcc_separate_buffer == NULL);
        assert(rtex->fmask.size == 0);
 
        /* Metadata image format format version 1:
@@ -3510,6 +3539,7 @@ static void si_apply_opaque_metadata(struct r600_common_screen *rscreen,
 
 void si_init_screen_state_functions(struct si_screen *sscreen)
 {
+       sscreen->b.b.is_format_supported = si_is_format_supported;
        sscreen->b.query_opaque_metadata = si_query_opaque_metadata;
        sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata;
 }
@@ -3800,7 +3830,15 @@ static void si_init_config(struct si_context *sctx)
                       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
 
        si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
-       si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
+       si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
+                      S_028230_ER_TRI(0xA) |
+                      S_028230_ER_POINT(0xA) |
+                      S_028230_ER_RECT(0xA) |
+                      /* Required by DX10_DIAMOND_TEST_ENA: */
+                      S_028230_ER_LINE_LR(0x1A) |
+                      S_028230_ER_LINE_RL(0x26) |
+                      S_028230_ER_LINE_TB(0xA) |
+                      S_028230_ER_LINE_BT(0xA));
        /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
        si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
        si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
@@ -3816,6 +3854,7 @@ static void si_init_config(struct si_context *sctx)
        si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
 
        if (sctx->b.chip_class >= CIK) {
+               si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
                si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
                si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
                si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
@@ -3828,7 +3867,6 @@ static void si_init_config(struct si_context *sctx)
                         *
                         * LATE_ALLOC_VS = 2 is the highest safe number.
                         */
-                       si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
                        si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
                        si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
                } else {
@@ -3837,7 +3875,6 @@ static void si_init_config(struct si_context *sctx)
                         * - VS can't execute on CU0.
                         * - If HS writes outputs to LDS, LS can't execute on CU0.
                         */
-                       si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
                        si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
                        si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
                }
@@ -3877,6 +3914,11 @@ static void si_init_config(struct si_context *sctx)
        if (sctx->b.family == CHIP_STONEY)
                si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
 
+       if (sctx->b.family >= CHIP_POLARIS10)
+               si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
+                              S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
+                              S_028830_LINE_FILTER_DISABLE(1)); /* line bug */
+
        si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
        if (sctx->b.chip_class >= CIK)
                si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);