radeonsi: add a performance tweak for 4 SE parts
authorMarek Olšák <marek.olsak@amd.com>
Fri, 3 Jun 2016 14:20:17 +0000 (16:20 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 6 Jun 2016 20:50:55 +0000 (22:50 +0200)
Ported from Vulkan.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_state_draw.c

index aed509de9bbfae6088ad8d3a35880ddcab72f18b..0a85d5a0ca387187e532301aba423693021f0490 100644 (file)
@@ -308,6 +308,17 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
                    (info->indirect || info->instance_count > 1))
                        wd_switch_on_eop = true;
 
+               /* Performance recommendation for 4 SE Gfx7-8 parts if
+                * instances are smaller than a primgroup. Ignore the fact
+                * primgroup_size is a primitive count, not vertex count.
+                * Don't do anything for indirect draws.
+                */
+               if (sctx->b.chip_class <= VI &&
+                   sctx->b.screen->info.max_se >= 4 &&
+                   !info->indirect &&
+                   info->instance_count > 1 && info->count < primgroup_size)
+                       wd_switch_on_eop = true;
+
                /* Required on CIK and later. */
                if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop)
                        ia_switch_on_eoi = true;