From 591f146fd2c7b265cd9e759c242e6d6437ea6578 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 29 Dec 2019 06:10:47 -0800 Subject: [PATCH] intel/fs/cse: Fix non-deterministic behavior due to inaccurate liveness calculation. The liveness calculation done by the local CSE pass in order to prune AEB entries whose sources are no longer live is currently inaccurate, because the live intervals are calculated once at the beginning of the pass, so they don't take into account any of the copy instructions inserted by the CSE pass as it makes progress. However the IP counter used in that calculation is based on the start_ip of the basic block, which is updated automatically whenever any instructions are inserted into the CFG. This causes the IP counter and liveness intervals to get out of sync in programs with multiple basic blocks, causing the CSE pass to toss AEB entries prematurely, which can lead to missed optimization opportunities rather non-deterministically. On BDW this leads to the following shader-db changes: total instructions in shared programs: 14952488 -> 14951763 (-0.00%) instructions in affected programs: 45416 -> 44691 (-1.60%) helped: 40 HURT: 4 total spills in shared programs: 20989 -> 20970 (-0.09%) spills in affected programs: 103 -> 84 (-18.45%) helped: 3 HURT: 0 total fills in shared programs: 24981 -> 24926 (-0.22%) fills in affected programs: 127 -> 72 (-43.31%) helped: 3 HURT: 0 In addition it avoids a number of regressions in combination with some of the optimization changes I'm working on for SIMD32, which would have made CSE more effective... Causing it to be less effective elsewhere in the program astonishingly. Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs.h | 2 +- src/intel/compiler/brw_fs_cse.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 680bdc535ac..5236fff4b6e 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -131,7 +131,7 @@ public: bool opt_algebraic(); bool opt_redundant_discard_jumps(); bool opt_cse(); - bool opt_cse_local(bblock_t *block); + bool opt_cse_local(bblock_t *block, int &ip); bool opt_copy_propagation(); bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); bool try_constant_propagate(fs_inst *inst, acp_entry *entry); diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index 6efa111b1a4..f348f915e78 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -242,14 +242,13 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) } bool -fs_visitor::opt_cse_local(bblock_t *block) +fs_visitor::opt_cse_local(bblock_t *block, int &ip) { bool progress = false; exec_list aeb; void *cse_ctx = ralloc_context(NULL); - int ip = block->start_ip; foreach_inst_in_block(fs_inst, inst, block) { /* Skip some cases. */ if (is_expression(this, inst) && !inst->is_partial_write() && @@ -370,11 +369,12 @@ bool fs_visitor::opt_cse() { bool progress = false; + int ip = 0; calculate_live_intervals(); foreach_block (block, cfg) { - progress = opt_cse_local(block) || progress; + progress = opt_cse_local(block, ip) || progress; } if (progress) -- 2.30.2