From 1e8adb0ee43062210ca54821a880ef08bfdba1b7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 24 Jun 2016 02:22:42 +0200 Subject: [PATCH] radeonsi: fix a compute shader hang with big threadgroups on SI & CI MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit ported from Vulkan Cc: 12.0 Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_compute.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 2f7e1721c89..f19e830c580 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -443,6 +443,21 @@ static void si_launch_grid( struct si_context *sctx = (struct si_context*)ctx; struct si_compute *program = sctx->cs_shader_state.program; int i; + /* HW bug workaround when CS threadgroups > 256 threads and async + * compute isn't used, i.e. only one compute job can run at a time. + * If async compute is possible, the threadgroup size must be limited + * to 256 threads on all queues to avoid the bug. + * Only SI and certain CIK chips are affected. + */ + bool cs_regalloc_hang = + (sctx->b.chip_class == SI || + sctx->b.family == CHIP_BONAIRE || + sctx->b.family == CHIP_KABINI) && + info->block[0] * info->block[1] * info->block[2] > 256; + + if (cs_regalloc_hang) + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | + SI_CONTEXT_CS_PARTIAL_FLUSH; si_decompress_compute_textures(sctx); @@ -493,6 +508,9 @@ static void si_launch_grid( sctx->b.num_compute_calls++; if (sctx->cs_shader_state.uses_scratch) sctx->b.num_spill_compute_calls++; + + if (cs_regalloc_hang) + sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; } -- 2.30.2