radeonsi: Allow TES distribution between shader engines.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 12 Apr 2016 18:28:46 +0000 (20:28 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Thu, 26 May 2016 20:07:04 +0000 (22:07 +0200)
The R_028B50_VGT_TESS_DISTRIBUTION value is copied from
amdgpu-pro. Smaller values in the ACCUM fields seem to
decrease the performance advantage from this patch, higher
values don't seem to matter.

v2: Add distribution mode field enums.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c
src/gallium/drivers/radeonsi/sid.h

index aefa336c50fd6d49b044e6061a5bd590fafd8a17..ab321ef0b8fe869bff1c9d76432aa9fe0a04be9f 100644 (file)
@@ -3791,6 +3791,11 @@ static void si_init_config(struct si_context *sctx)
                               S_028424_OVERWRITE_COMBINER_WATERMARK(4));
                si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
                si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
+               si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
+                              S_028B50_ACCUM_ISOLINE(32) |
+                              S_028B50_ACCUM_TRI(11) |
+                              S_028B50_ACCUM_QUAD(11) |
+                              S_028B50_DONUT_SPLIT(16));
        }
 
        if (sctx->b.family == CHIP_STONEY)
index c8b87a9f1a0fcd78811cd5f92480966c0d26ac6d..788869e5a918d67aae8fd6526f71e6f6c2484612 100644 (file)
@@ -279,6 +279,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
                     sctx->b.family == CHIP_BONAIRE) &&
                    sctx->gs_shader.cso)
                        partial_vs_wave = true;
+
+               /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
+               if (sctx->b.chip_class >= VI) {
+                       if (sctx->gs_shader.cso)
+                               partial_es_wave = true;
+                       else
+                               partial_vs_wave = true;
+               }
        }
 
        /* This is a hardware requirement. */
index 116bf27b4716abd1f9e2c4716472b153803b72f7..c6f51ea84d52d153e542bb1fc2342b096c865b63 100644 (file)
@@ -249,7 +249,8 @@ void si_destroy_shader_cache(struct si_screen *sscreen)
 
 /* SHADER STATES */
 
-static void si_set_tesseval_regs(struct si_shader *shader,
+static void si_set_tesseval_regs(struct si_screen *sscreen,
+                                struct si_shader *shader,
                                 struct si_pm4_state *pm4)
 {
        struct tgsi_shader_info *info = &shader->selector->info;
@@ -257,7 +258,7 @@ static void si_set_tesseval_regs(struct si_shader *shader,
        unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
        bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
        bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
-       unsigned type, partitioning, topology;
+       unsigned type, partitioning, topology, distribution_mode;
 
        switch (tes_prim_mode) {
        case PIPE_PRIM_LINES:
@@ -299,10 +300,16 @@ static void si_set_tesseval_regs(struct si_shader *shader,
        else
                topology = V_028B6C_OUTPUT_TRIANGLE_CW;
 
+       if (sscreen->b.chip_class >= VI)
+               distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS;
+       else
+               distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST;
+
        si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
                       S_028B6C_TYPE(type) |
                       S_028B6C_PARTITIONING(partitioning) |
-                      S_028B6C_TOPOLOGY(topology));
+                      S_028B6C_TOPOLOGY(topology) |
+                      S_028B6C_DISTRIBUTION_MODE(distribution_mode));
 }
 
 static void si_shader_ls(struct si_shader *shader)
@@ -359,7 +366,7 @@ static void si_shader_hs(struct si_shader *shader)
                       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 }
 
-static void si_shader_es(struct si_shader *shader)
+static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
        unsigned num_user_sgprs;
@@ -402,7 +409,7 @@ static void si_shader_es(struct si_shader *shader)
                       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 
        if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
-               si_set_tesseval_regs(shader, pm4);
+               si_set_tesseval_regs(sscreen, shader, pm4);
 }
 
 /**
@@ -489,7 +496,8 @@ static void si_shader_gs(struct si_shader *shader)
  * If \p gs is non-NULL, it points to the geometry shader for which this shader
  * is the copy shader.
  */
-static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
+static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
+                         struct si_shader *gs)
 {
        struct si_pm4_state *pm4;
        unsigned num_user_sgprs;
@@ -583,7 +591,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
                               S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
 
        if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
-               si_set_tesseval_regs(shader, pm4);
+               si_set_tesseval_regs(sscreen, shader, pm4);
 }
 
 static unsigned si_get_ps_num_interp(struct si_shader *ps)
@@ -769,7 +777,8 @@ static void si_shader_ps(struct si_shader *shader)
                shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
 }
 
-static void si_shader_init_pm4_state(struct si_shader *shader)
+static void si_shader_init_pm4_state(struct si_screen *sscreen,
+                                     struct si_shader *shader)
 {
 
        if (shader->pm4)
@@ -780,22 +789,22 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
                if (shader->key.vs.as_ls)
                        si_shader_ls(shader);
                else if (shader->key.vs.as_es)
-                       si_shader_es(shader);
+                       si_shader_es(sscreen, shader);
                else
-                       si_shader_vs(shader, NULL);
+                       si_shader_vs(sscreen, shader, NULL);
                break;
        case PIPE_SHADER_TESS_CTRL:
                si_shader_hs(shader);
                break;
        case PIPE_SHADER_TESS_EVAL:
                if (shader->key.tes.as_es)
-                       si_shader_es(shader);
+                       si_shader_es(sscreen, shader);
                else
-                       si_shader_vs(shader, NULL);
+                       si_shader_vs(sscreen, shader, NULL);
                break;
        case PIPE_SHADER_GEOMETRY:
                si_shader_gs(shader);
-               si_shader_vs(shader->gs_copy_shader, shader);
+               si_shader_vs(sscreen, shader->gs_copy_shader, shader);
                break;
        case PIPE_SHADER_FRAGMENT:
                si_shader_ps(shader);
@@ -989,7 +998,7 @@ static int si_shader_select_with_key(struct pipe_context *ctx,
                pipe_mutex_unlock(sel->mutex);
                return r;
        }
-       si_shader_init_pm4_state(shader);
+       si_shader_init_pm4_state(sctx->screen, shader);
 
        if (!sel->last_variant) {
                sel->first_variant = shader;
@@ -1664,7 +1673,7 @@ static int si_update_scratch_buffer(struct si_context *sctx,
                return r;
 
        /* Update the shader state to use the new shader bo. */
-       si_shader_init_pm4_state(shader);
+       si_shader_init_pm4_state(sctx->screen, shader);
 
        r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
 
index dcd63727058f4fb527c08e44e35d33fdda02ae1a..25f8cf5cee4bd9c265f40b498268a988e13ed429 100644 (file)
 #define   S_028B6C_DISTRIBUTION_MODE(x)                               (((unsigned)(x) & 0x03) << 17)
 #define   G_028B6C_DISTRIBUTION_MODE(x)                               (((x) >> 17) & 0x03)
 #define   C_028B6C_DISTRIBUTION_MODE                                  0xFFF9FFFF
+#define     V_028B6C_DISTRIBUTION_MODE_NO_DIST                      0x00
+#define     V_028B6C_DISTRIBUTION_MODE_PATCHES                      0x01
+#define     V_028B6C_DISTRIBUTION_MODE_DONUTS                       0x02
 #define   S_028B6C_MTYPE(x)                                           (((unsigned)(x) & 0x03) << 19)
 #define   G_028B6C_MTYPE(x)                                           (((x) >> 19) & 0x03)
 #define   C_028B6C_MTYPE                                              0xFFE7FFFF