From c6e0c062daa55269661b190deaec40e9749198bc Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 27 Feb 2020 19:56:22 +0000 Subject: [PATCH] aco: improve control flow handling in GFX6-9 NOP pass MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fixes Detroit: Become Human hang. Also affects World of Warships. pipeline-db (Tahiti): Totals from affected shaders: SGPRS: 0 -> 0 (0.00 %) VGPRS: 0 -> 0 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 0 -> 0 (0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 0 -> 0 (0.00 %) pipeline-db (Polaris): Totals from affected shaders: SGPRS: 17168 -> 17168 (0.00 %) VGPRS: 11296 -> 11296 (0.00 %) Spilled SGPRs: 1870 -> 1870 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 1472628 -> 1473292 (0.05 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 628 -> 628 (0.00 %) pipeline-db (Vega): Totals from affected shaders: SGPRS: 17168 -> 17168 (0.00 %) VGPRS: 11296 -> 11296 (0.00 %) Spilled SGPRs: 1870 -> 1870 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 1409716 -> 1410380 (0.05 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 0 -> 0 (0.00 %) Max Waves is lower than it should be because of a null winsys bug. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Tested-by: Marge Bot Part-of: --- src/amd/compiler/aco_insert_NOPs.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 4302711ba81..75dbe852174 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -225,7 +225,16 @@ int handle_raw_hazard_internal(Program *program, Block *block, return 0; } - return 0; + int res = 0; + + /* Loops require branch instructions, which count towards the wait + * states. So even with loops this should finish unless nops_needed is some + * huge value. */ + for (unsigned lin_pred : block->linear_preds) { + res = std::max(res, handle_raw_hazard_internal( + program, &program->blocks[lin_pred], nops_needed, reg, mask)); + } + return res; } template @@ -763,14 +772,10 @@ void mitigate_hazards(Program *program) void insert_NOPs(Program* program) { - if (program->chip_class >= GFX10) { + if (program->chip_class >= GFX10) mitigate_hazards(program); - } else { - for (Block& block : program->blocks) { - NOP_ctx_gfx6 ctx; - handle_block(program, ctx, block); - } - } + else + mitigate_hazards(program); } } -- 2.30.2