From a5a2cc530c1f493557c232557ad1910e607712c2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 8 Sep 2016 01:42:06 +0200 Subject: [PATCH] radeonsi: fix the VGT performance tweak for small instances MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Based on the VGT spec. The Vulkan driver doesn't do it optimally and they plan to fix it. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_state_draw.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index d3e6e1ac937..e44147f43b7 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -318,14 +318,15 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, wd_switch_on_eop = true; /* Performance recommendation for 4 SE Gfx7-8 parts if - * instances are smaller than a primgroup. Ignore the fact - * primgroup_size is a primitive count, not vertex count. - * Don't do anything for indirect draws. + * instances are smaller than a primgroup. + * Assume indirect draws always use small instances. + * This is needed for good VS wave utilization. */ if (sctx->b.chip_class <= VI && sctx->b.screen->info.max_se >= 4 && - !info->indirect && - info->instance_count > 1 && info->count < primgroup_size) + (info->indirect || + (info->instance_count > 1 && + si_num_prims_for_vertices(info) < primgroup_size))) wd_switch_on_eop = true; /* Required on CIK and later. */ -- 2.30.2