radeonsi: use trapezoid distribution for tess on Fiji and Polaris
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 17 Jun 2016 18:08:42 +0000 (20:08 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 20 Jun 2016 16:29:55 +0000 (18:29 +0200)
This yields a small performance improvement in Unigine Heaven.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 6be2f4be1f6ad24473bd6e7c0c1bb855e90ca5ae..1cef1dc875039cd9caa0a98fb1ff1939412c83e7 100644 (file)
@@ -3839,16 +3839,28 @@ static void si_init_config(struct si_context *sctx)
        }
 
        if (sctx->b.chip_class >= VI) {
+               unsigned vgt_tess_distribution;
+
                si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
                               S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
                               S_028424_OVERWRITE_COMBINER_WATERMARK(4));
                si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
                si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
-               si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
-                              S_028B50_ACCUM_ISOLINE(32) |
-                              S_028B50_ACCUM_TRI(11) |
-                              S_028B50_ACCUM_QUAD(11) |
-                              S_028B50_DONUT_SPLIT(16));
+
+               vgt_tess_distribution =
+                       S_028B50_ACCUM_ISOLINE(32) |
+                       S_028B50_ACCUM_TRI(11) |
+                       S_028B50_ACCUM_QUAD(11) |
+                       S_028B50_DONUT_SPLIT(16);
+
+               /* Testing with Unigine Heaven extreme tesselation yielded best results
+                * with TRAP_SPLIT = 3.
+                */
+               if (sctx->b.family == CHIP_FIJI ||
+                   sctx->b.family >= CHIP_POLARIS10)
+                       vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
+
+               si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
        }
 
        if (sctx->b.family == CHIP_STONEY)
index a7af76d81d48abc6da4f3a86f1c185a826197706..daef49d58b660f215b1bd39e236ffe2821231f9d 100644 (file)
@@ -300,9 +300,13 @@ static void si_set_tesseval_regs(struct si_screen *sscreen,
        else
                topology = V_028B6C_OUTPUT_TRIANGLE_CW;
 
-       if (sscreen->b.chip_class >= VI)
-               distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS;
-       else
+       if (sscreen->b.chip_class >= VI) {
+               if (sscreen->b.family == CHIP_FIJI ||
+                   sscreen->b.family >= CHIP_POLARIS10)
+                       distribution_mode = V_028B6C_DISTRIBUTION_MODE_TRAPEZOIDS;
+               else
+                       distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS;
+       } else
                distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST;
 
        si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,