radeonsi: compute only one set of interpolation (i,j) when MSAA is disabled
authorMarek Olšák <marek.olsak@amd.com>
Thu, 30 Jun 2016 08:57:34 +0000 (10:57 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 4 Jul 2016 22:47:12 +0000 (00:47 +0200)
This should increase the PS launch rate for shaders using at least 2 pairs
of perspective (i,j) and same for linear.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index da4a6cb19e5f4b91d4d5b71cec6237f2be520f82..a59c28e75bf5460894372fea70d1fc405f202b1a 100644 (file)
@@ -1300,6 +1300,20 @@ static unsigned select_interp_param(struct si_shader_context *ctx,
                        return SI_PARAM_LINEAR_SAMPLE;
                }
        }
+       if (ctx->shader->key.ps.prolog.force_persp_center_interp) {
+               switch (param) {
+               case SI_PARAM_PERSP_CENTROID:
+               case SI_PARAM_PERSP_SAMPLE:
+                       return SI_PARAM_PERSP_CENTER;
+               }
+       }
+       if (ctx->shader->key.ps.prolog.force_linear_center_interp) {
+               switch (param) {
+               case SI_PARAM_LINEAR_CENTROID:
+               case SI_PARAM_LINEAR_SAMPLE:
+                       return SI_PARAM_PERSP_CENTER;
+               }
+       }
 
        return param;
 }
@@ -6382,6 +6396,8 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
                fprintf(f, "  prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple);
                fprintf(f, "  prolog.force_persp_sample_interp = %u\n", key->ps.prolog.force_persp_sample_interp);
                fprintf(f, "  prolog.force_linear_sample_interp = %u\n", key->ps.prolog.force_linear_sample_interp);
+               fprintf(f, "  prolog.force_persp_center_interp = %u\n", key->ps.prolog.force_persp_center_interp);
+               fprintf(f, "  prolog.force_linear_center_interp = %u\n", key->ps.prolog.force_linear_center_interp);
                fprintf(f, "  epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format);
                fprintf(f, "  epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8);
                fprintf(f, "  epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf);
@@ -7255,6 +7271,40 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen,
                                                   linear_sample[i], base + 10 + i, "");
        }
 
+       /* Force center interpolation. */
+       if (key->ps_prolog.states.force_persp_center_interp) {
+               unsigned i, base = key->ps_prolog.num_input_sgprs;
+               LLVMValueRef persp_center[2];
+
+               /* Read PERSP_CENTER. */
+               for (i = 0; i < 2; i++)
+                       persp_center[i] = LLVMGetParam(func, base + 2 + i);
+               /* Overwrite PERSP_SAMPLE. */
+               for (i = 0; i < 2; i++)
+                       ret = LLVMBuildInsertValue(gallivm->builder, ret,
+                                                  persp_center[i], base + i, "");
+               /* Overwrite PERSP_CENTROID. */
+               for (i = 0; i < 2; i++)
+                       ret = LLVMBuildInsertValue(gallivm->builder, ret,
+                                                  persp_center[i], base + 4 + i, "");
+       }
+       if (key->ps_prolog.states.force_linear_center_interp) {
+               unsigned i, base = key->ps_prolog.num_input_sgprs;
+               LLVMValueRef linear_center[2];
+
+               /* Read LINEAR_CENTER. */
+               for (i = 0; i < 2; i++)
+                       linear_center[i] = LLVMGetParam(func, base + 8 + i);
+               /* Overwrite LINEAR_SAMPLE. */
+               for (i = 0; i < 2; i++)
+                       ret = LLVMBuildInsertValue(gallivm->builder, ret,
+                                                  linear_center[i], base + 6 + i, "");
+               /* Overwrite LINEAR_CENTROID. */
+               for (i = 0; i < 2; i++)
+                       ret = LLVMBuildInsertValue(gallivm->builder, ret,
+                                                  linear_center[i], base + 10 + i, "");
+       }
+
        /* Tell LLVM to insert WQM instruction sequence when needed. */
        if (key->ps_prolog.wqm) {
                LLVMAddTargetDependentFunctionAttr(func,
@@ -7414,7 +7464,9 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
        prolog_key.ps_prolog.wqm = info->uses_derivatives &&
                (prolog_key.ps_prolog.colors_read ||
                 prolog_key.ps_prolog.states.force_persp_sample_interp ||
-                prolog_key.ps_prolog.states.force_linear_sample_interp);
+                prolog_key.ps_prolog.states.force_linear_sample_interp ||
+                prolog_key.ps_prolog.states.force_persp_center_interp ||
+                prolog_key.ps_prolog.states.force_linear_center_interp);
 
        if (info->colors_read) {
                unsigned *color = shader->selector->color_attr_index;
@@ -7443,6 +7495,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
                                /* Force the interpolation location for colors here. */
                                if (shader->key.ps.prolog.force_persp_sample_interp)
                                        location = TGSI_INTERPOLATE_LOC_SAMPLE;
+                               if (shader->key.ps.prolog.force_persp_center_interp)
+                                       location = TGSI_INTERPOLATE_LOC_CENTER;
 
                                switch (location) {
                                case TGSI_INTERPOLATE_LOC_SAMPLE:
@@ -7468,6 +7522,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
                                /* Force the interpolation location for colors here. */
                                if (shader->key.ps.prolog.force_linear_sample_interp)
                                        location = TGSI_INTERPOLATE_LOC_SAMPLE;
+                               if (shader->key.ps.prolog.force_linear_center_interp)
+                                       location = TGSI_INTERPOLATE_LOC_CENTER;
 
                                switch (location) {
                                case TGSI_INTERPOLATE_LOC_SAMPLE:
@@ -7499,6 +7555,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
        if (prolog_key.ps_prolog.colors_read ||
            prolog_key.ps_prolog.states.force_persp_sample_interp ||
            prolog_key.ps_prolog.states.force_linear_sample_interp ||
+           prolog_key.ps_prolog.states.force_persp_center_interp ||
+           prolog_key.ps_prolog.states.force_linear_center_interp ||
            prolog_key.ps_prolog.states.poly_stipple) {
                shader->prolog =
                        si_get_shader_part(sscreen, &sscreen->ps_prologs,
@@ -7544,6 +7602,20 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
                shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
                shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
        }
+       if (shader->key.ps.prolog.force_persp_center_interp &&
+           (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
+            G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
+               shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
+               shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+               shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
+       }
+       if (shader->key.ps.prolog.force_linear_center_interp &&
+           (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
+            G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
+               shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
+               shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+               shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
+       }
 
        /* POW_W_FLOAT requires that one of the perspective weights is enabled. */
        if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
index 6c2e83267ef4bb90070b523e287a5c8b35d8a739..064773605fb1c09bd61d6e168e3510a961b23501 100644 (file)
@@ -315,9 +315,9 @@ struct si_ps_prolog_bits {
        unsigned        poly_stipple:1;
        unsigned        force_persp_sample_interp:1;
        unsigned        force_linear_sample_interp:1;
+       unsigned        force_persp_center_interp:1;
+       unsigned        force_linear_center_interp:1;
        /* TODO:
-        * - add force_center_interp if MSAA is disabled and centroid or
-        *   sample are present
         * - add force_center_interp_bc_optimize to force center interpolation
         *   based on the bc_optimize SGPR bit if MSAA is enabled, centroid is
         *   present and sample isn't present.
index cf5c1f94a5a52723ad568db93b3dcd92880cea62..d679825914d275208a6bdc6ee78299e9c36374ec 100644 (file)
@@ -946,6 +946,19 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                                key->ps.prolog.force_linear_sample_interp =
                                        sel->info.uses_linear_center ||
                                        sel->info.uses_linear_centroid;
+                       } else if (!rs->multisample_enable ||
+                                  sctx->framebuffer.nr_samples <= 1) {
+                               /* Make sure SPI doesn't compute more than 1 pair
+                                * of (i,j), which is the optimization here. */
+                               key->ps.prolog.force_persp_center_interp =
+                                       sel->info.uses_persp_center +
+                                       sel->info.uses_persp_centroid +
+                                       sel->info.uses_persp_sample > 1;
+
+                               key->ps.prolog.force_linear_center_interp =
+                                       sel->info.uses_linear_center +
+                                       sel->info.uses_linear_centroid +
+                                       sel->info.uses_linear_sample > 1;
                        }
                }