radeonsi: use all SPI color formats
authorMarek Olšák <marek.olsak@amd.com>
Fri, 15 Jan 2016 13:40:19 +0000 (14:40 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 22 Jan 2016 14:02:40 +0000 (15:02 +0100)
because not using SPI_SHADER_32_ABGR doubles fill rate.

We should also get optimal performance if alpha isn't needed or blending
isn't enabled.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index f3271e2ee6caded38665a0d87946daf4f3543c92..d66e74f92541775fb923f2abece834890746c57c 100644 (file)
@@ -236,6 +236,7 @@ struct r600_surface {
        /* Misc. color flags. */
        bool alphatest_bypass;
        bool export_16bpc;
+       bool color_is_int8;
 
        /* Color registers. */
        unsigned cb_color_info;
@@ -252,7 +253,10 @@ struct r600_surface {
        unsigned cb_color_fmask_slice;  /* EG and later */
        unsigned cb_color_cmask;        /* CB_COLORn_TILE (r600 only) */
        unsigned cb_color_mask;         /* R600 only */
-       unsigned spi_shader_col_format; /* SI+ */
+       unsigned spi_shader_col_format;         /* SI+, no blending, no alpha-to-coverage. */
+       unsigned spi_shader_col_format_alpha;   /* SI+, alpha-to-coverage */
+       unsigned spi_shader_col_format_blend;   /* SI+, blending without alpha. */
+       unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
        unsigned sx_ps_downconvert;     /* Stoney only */
        unsigned sx_blend_opt_epsilon;  /* Stoney only */
        struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
index 75a9d56d1103c676170325357d8932a75823919c..a93887ec2716d61658323608c463f3efb4b31c9a 100644 (file)
@@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
        enum pipe_format format = int_to_norm_format(info->dst.format);
        unsigned sample_mask = ~0;
 
+       /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
+        * the format is R16G16. Use R16A16, which does work.
+        */
+       if (format == PIPE_FORMAT_R16G16_UNORM)
+               format = PIPE_FORMAT_R16A16_UNORM;
+       if (format == PIPE_FORMAT_R16G16_SNORM)
+               format = PIPE_FORMAT_R16A16_SNORM;
+
        if (info->src.resource->nr_samples > 1 &&
            info->dst.resource->nr_samples <= 1 &&
            util_max_layer(info->src.resource, 0) == 0 &&
index e2009de9870e2e87e89335c23d73433db4051eb2..e2725fe36792d1d53cba7b16af5a1c42e9734416 100644 (file)
@@ -126,6 +126,10 @@ struct si_framebuffer {
        unsigned                        cb0_is_integer;
        unsigned                        compressed_cb_mask;
        unsigned                        spi_shader_col_format;
+       unsigned                        spi_shader_col_format_alpha;
+       unsigned                        spi_shader_col_format_blend;
+       unsigned                        spi_shader_col_format_blend_alpha;
+       unsigned                        color_is_int8; /* bitmask */
        unsigned                        dirty_cbufs;
        bool                            dirty_zsbuf;
 };
index af6759ef19cd77a44f453343368cfb619844bd55..a3ddee8b42ca7708dd16cd5a76e6a4680bb804ed 100644 (file)
@@ -420,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
                       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
                       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
 
+       if (state->alpha_to_coverage)
+               blend->need_src_alpha_4bit |= 0xf;
+
        blend->cb_target_mask = 0;
        for (int i = 0; i < 8; i++) {
                /* state->rt entries > 0 only written if independent blending */
@@ -457,6 +460,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
                        blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
                }
                si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
+
+               blend->blend_enable_4bit |= 0xf << (i * 4);
+
+               /* This is only important for formats without alpha. */
+               if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+                   dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+                   srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+                   dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+                   srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+                   dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
+                       blend->need_src_alpha_4bit |= 0xf << (i * 4);
        }
 
        if (blend->cb_target_mask) {
@@ -1270,53 +1284,6 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
        }
 }
 
-/* Returns the size in bits of the widest component of a CB format */
-static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
-{
-       switch(colorformat) {
-       case V_028C70_COLOR_4_4_4_4:
-               return 4;
-
-       case V_028C70_COLOR_1_5_5_5:
-       case V_028C70_COLOR_5_5_5_1:
-               return 5;
-
-       case V_028C70_COLOR_5_6_5:
-               return 6;
-
-       case V_028C70_COLOR_8:
-       case V_028C70_COLOR_8_8:
-       case V_028C70_COLOR_8_8_8_8:
-               return 8;
-
-       case V_028C70_COLOR_10_10_10_2:
-       case V_028C70_COLOR_2_10_10_10:
-               return 10;
-
-       case V_028C70_COLOR_10_11_11:
-       case V_028C70_COLOR_11_11_10:
-               return 11;
-
-       case V_028C70_COLOR_16:
-       case V_028C70_COLOR_16_16:
-       case V_028C70_COLOR_16_16_16_16:
-               return 16;
-
-       case V_028C70_COLOR_8_24:
-       case V_028C70_COLOR_24_8:
-               return 24;
-
-       case V_028C70_COLOR_32:
-       case V_028C70_COLOR_32_32:
-       case V_028C70_COLOR_32_32_32_32:
-       case V_028C70_COLOR_X24_8_32_FLOAT:
-               return 32;
-       }
-
-       assert(!"Unknown maximum component size");
-       return 0;
-}
-
 static uint32_t si_translate_dbformat(enum pipe_format format)
 {
        switch (format) {
@@ -1886,17 +1853,119 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten
 
 static void si_choose_spi_color_formats(struct r600_surface *surf,
                                        unsigned format, unsigned swap,
-                                       unsigned ntype)
+                                       unsigned ntype, bool is_depth)
 {
-       unsigned max_comp_size = si_colorformat_max_comp_size(format);
+       /* Alpha is needed for alpha-to-coverage.
+        * Blending may be with or without alpha.
+        */
+       unsigned normal = 0; /* most optimal, may not support blending or export alpha */
+       unsigned alpha = 0; /* exports alpha, but may not support blending */
+       unsigned blend = 0; /* supports blending, but may not export alpha */
+       unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
 
-       surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
+       /* Choose the SPI color formats. These are required values for Stoney/RB+.
+        * Other chips have multiple choices, though they are not necessarily better.
+        */
+       switch (format) {
+       case V_028C70_COLOR_5_6_5:
+       case V_028C70_COLOR_1_5_5_5:
+       case V_028C70_COLOR_5_5_5_1:
+       case V_028C70_COLOR_4_4_4_4:
+       case V_028C70_COLOR_10_11_11:
+       case V_028C70_COLOR_11_11_10:
+       case V_028C70_COLOR_8:
+       case V_028C70_COLOR_8_8:
+       case V_028C70_COLOR_8_8_8_8:
+       case V_028C70_COLOR_10_10_10_2:
+       case V_028C70_COLOR_2_10_10_10:
+               if (ntype == V_028C70_NUMBER_UINT)
+                       alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
+               else if (ntype == V_028C70_NUMBER_SINT)
+                       alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
+               else
+                       alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+               break;
+
+       case V_028C70_COLOR_16:
+       case V_028C70_COLOR_16_16:
+       case V_028C70_COLOR_16_16_16_16:
+               if (ntype == V_028C70_NUMBER_UNORM ||
+                   ntype == V_028C70_NUMBER_SNORM) {
+                       /* UNORM16 and SNORM16 don't support blending */
+                       if (ntype == V_028C70_NUMBER_UNORM)
+                               normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
+                       else
+                               normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
+
+                       /* Use 32 bits per channel for blending. */
+                       if (format == V_028C70_COLOR_16) {
+                               if (swap == V_028C70_SWAP_STD) { /* R */
+                                       blend = V_028714_SPI_SHADER_32_R;
+                                       blend_alpha = V_028714_SPI_SHADER_32_AR;
+                               } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
+                                       blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
+                               else
+                                       assert(0);
+                       } else if (format == V_028C70_COLOR_16_16) {
+                               if (swap == V_028C70_SWAP_STD) { /* RG */
+                                       blend = V_028714_SPI_SHADER_32_GR;
+                                       blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+                               } else if (swap == V_028C70_SWAP_ALT) /* RA */
+                                       blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
+                               else
+                                       assert(0);
+                       } else /* 16_16_16_16 */
+                               blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+               } else if (ntype == V_028C70_NUMBER_UINT)
+                       alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
+               else if (ntype == V_028C70_NUMBER_SINT)
+                       alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
+               else if (ntype == V_028C70_NUMBER_FLOAT)
+                       alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+               else
+                       assert(0);
+               break;
 
-       if (ntype == V_028C70_NUMBER_SRGB ||
-           ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
-            max_comp_size <= 10) ||
-           (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16))
-               surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR;
+       case V_028C70_COLOR_32:
+               if (swap == V_028C70_SWAP_STD) { /* R */
+                       blend = normal = V_028714_SPI_SHADER_32_R;
+                       alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
+               } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
+                       alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
+               else
+                       assert(0);
+               break;
+
+       case V_028C70_COLOR_32_32:
+               if (swap == V_028C70_SWAP_STD) { /* RG */
+                       blend = normal = V_028714_SPI_SHADER_32_GR;
+                       alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+               } else if (swap == V_028C70_SWAP_ALT) /* RA */
+                       alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
+               else
+                       assert(0);
+               break;
+
+       case V_028C70_COLOR_32_32_32_32:
+       case V_028C70_COLOR_8_24:
+       case V_028C70_COLOR_24_8:
+       case V_028C70_COLOR_X24_8_32_FLOAT:
+               alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
+               break;
+
+       default:
+               assert(0);
+               return;
+       }
+
+       /* The DB->CB copy needs 32_ABGR. */
+       if (is_depth)
+               alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
+
+       surf->spi_shader_col_format = normal;
+       surf->spi_shader_col_format_alpha = alpha;
+       surf->spi_shader_col_format_blend = blend;
+       surf->spi_shader_col_format_blend_alpha = blend_alpha;
 }
 
 static void si_initialize_color_surface(struct si_context *sctx,
@@ -1989,6 +2058,12 @@ static void si_initialize_color_surface(struct si_context *sctx,
                blend_bypass = 1;
        }
 
+       if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
+           (format == V_028C70_COLOR_8 ||
+            format == V_028C70_COLOR_8_8 ||
+            format == V_028C70_COLOR_8_8_8_8))
+               surf->color_is_int8 = true;
+
        color_info = S_028C70_FORMAT(format) |
                S_028C70_COMP_SWAP(swap) |
                S_028C70_BLEND_CLAMP(blend_clamp) |
@@ -2068,7 +2143,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
        }
 
        /* Determine pixel shader export format */
-       si_choose_spi_color_formats(surf, format, swap, ntype);
+       si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
 
        if (sctx->b.family == CHIP_STONEY &&
            !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
@@ -2296,6 +2371,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        util_copy_framebuffer_state(&sctx->framebuffer.state, state);
 
        sctx->framebuffer.spi_shader_col_format = 0;
+       sctx->framebuffer.spi_shader_col_format_alpha = 0;
+       sctx->framebuffer.spi_shader_col_format_blend = 0;
+       sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
+       sctx->framebuffer.color_is_int8 = 0;
+
        sctx->framebuffer.compressed_cb_mask = 0;
        sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
        sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
@@ -2318,6 +2398,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
                sctx->framebuffer.spi_shader_col_format |=
                        surf->spi_shader_col_format << (i * 4);
+               sctx->framebuffer.spi_shader_col_format_alpha |=
+                       surf->spi_shader_col_format_alpha << (i * 4);
+               sctx->framebuffer.spi_shader_col_format_blend |=
+                       surf->spi_shader_col_format_blend << (i * 4);
+               sctx->framebuffer.spi_shader_col_format_blend_alpha |=
+                       surf->spi_shader_col_format_blend_alpha << (i * 4);
+
+               if (surf->color_is_int8)
+                       sctx->framebuffer.color_is_int8 |= 1 << i;
 
                if (rtex->fmask.size && rtex->cmask.size) {
                        sctx->framebuffer.compressed_cb_mask |= 1 << i;
@@ -2328,6 +2417,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        if (i == 1 && surf) {
                sctx->framebuffer.spi_shader_col_format |=
                        surf->spi_shader_col_format << (i * 4);
+               sctx->framebuffer.spi_shader_col_format_alpha |=
+                       surf->spi_shader_col_format_alpha << (i * 4);
+               sctx->framebuffer.spi_shader_col_format_blend |=
+                       surf->spi_shader_col_format_blend << (i * 4);
+               sctx->framebuffer.spi_shader_col_format_blend_alpha |=
+                       surf->spi_shader_col_format_blend_alpha << (i * 4);
        }
 
        if (state->zsbuf) {
index 46ba3c4301fd1cb88202286709fd94fa95360978..be3488e6dba1f21477c243bf969c2847950b205c 100644 (file)
@@ -42,6 +42,11 @@ struct si_state_blend {
        bool                    alpha_to_coverage;
        bool                    alpha_to_one;
        bool                    dual_src_blend;
+       /* Set 0xf or 0x0 (4 bits) per render target if the following is
+        * true. ANDed with spi_shader_col_format.
+        */
+       unsigned                blend_enable_4bit;
+       unsigned                need_src_alpha_4bit;
 };
 
 struct si_state_rasterizer {
index e08722d0d28e89b41890f1cdd0eb14c6d5ba3e12..59aee54c3b16bd0f8ca4a2dadcb4e6430eb5791b 100644 (file)
@@ -617,7 +617,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                    sel->info.colors_written == 0x1)
                        key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
 
-               key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
+               if (blend) {
+                       /* Select the shader color format based on whether
+                        * blending or alpha are needed.
+                        */
+                       key->ps.spi_shader_col_format =
+                               (blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+                                sctx->framebuffer.spi_shader_col_format_blend_alpha) |
+                               (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+                                sctx->framebuffer.spi_shader_col_format_blend) |
+                               (~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+                                sctx->framebuffer.spi_shader_col_format_alpha) |
+                               (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+                                sctx->framebuffer.spi_shader_col_format);
+               } else
+                       key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
 
                /* If alpha-to-coverage is enabled, we have to export alpha
                 * even if there is no color buffer.
@@ -626,6 +640,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                    blend && blend->alpha_to_coverage)
                        key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
 
+               /* On SI and CIK except Hawaii, the CB doesn't clamp outputs
+                * to the range supported by the type if a channel has less
+                * than 16 bits and the export format is 16_ABGR.
+                */
+               if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
+                       key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
+
                if (rs) {
                        bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
                                        sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||