From a4b36cd3dd307d75913dbc183cdf2d0c1e97ea0e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 18 Apr 2020 09:30:05 -0500 Subject: [PATCH] intel/fs: Coalesce when the src live range is contained in the dst MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Consider the following case: // g119-123 are written somewhere above mul.sat(16) g67<1>F g6.4<0,1,0>F g125<8,8,1>F mul.sat(16) g69<1>F g6.5<0,1,0>F g125<8,8,1>F mul.sat(16) g71<1>F g6.6<0,1,0>F g125<8,8,1>F mov(16) g119<1>F g67<8,8,1>F mov(16) g121<1>F g69<8,8,1>F mov(16) g123<1>F g71<8,8,1>F We should be able to coalesce it into mul.sat(16) g119<1>F g6.4<0,1,0>F g125<8,8,1>F mul.sat(16) g121<1>F g6.5<0,1,0>F g125<8,8,1>F mul.sat(16) g123<1>F g6.6<0,1,0>F g125<8,8,1>F What's stopping us is an overly conservative check for writes to the two registers being coalesced. The check walks over the intersection of their live ranges and checks for no writes to either one. However, because the register which starts the live range (the mul.sat in this case) is inside that intersection, we flag it as a write in the intersection and don't coalesce. However, this case is safe because the destination register of the copy is never read after the source is written. Shader-db changes on ICL: total instructions in shared programs: 16043613 -> 16042610 (<.01%) instructions in affected programs: 43036 -> 42033 (-2.33%) helped: 226 HURT: 0 helped stats (abs) min: 1 max: 30 x̄: 4.44 x̃: 4 helped stats (rel) min: 0.09% max: 26.67% x̄: 4.89% x̃: 3.43% 95% mean confidence interval for instructions value: -4.86 -4.02 95% mean confidence interval for instructions %-change: -5.57% -4.22% Instructions are helped. total cycles in shared programs: 334766372 -> 334710124 (-0.02%) cycles in affected programs: 617548 -> 561300 (-9.11%) helped: 214 HURT: 2 helped stats (abs) min: 15 max: 1512 x̄: 263.21 x̃: 212 helped stats (rel) min: 0.30% max: 75.36% x̄: 25.30% x̃: 21.58% HURT stats (abs) min: 40 max: 40 x̄: 40.00 x̃: 40 HURT stats (rel) min: 0.15% max: 0.15% x̄: 0.15% x̃: 0.15% 95% mean confidence interval for cycles value: -277.91 -242.90 95% mean confidence interval for cycles %-change: -27.58% -22.55% Cycles are helped. No spill/fill changes or gained/lost Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke Part-of: --- .../compiler/brw_fs_register_coalesce.cpp | 50 ++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp index e1964863519..5d24240c393 100644 --- a/src/intel/compiler/brw_fs_register_coalesce.cpp +++ b/src/intel/compiler/brw_fs_register_coalesce.cpp @@ -97,8 +97,8 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst) } static bool -can_coalesce_vars(const fs_live_variables &live, - const cfg_t *cfg, const fs_inst *inst, +can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg, + const bblock_t *block, const fs_inst *inst, int dst_var, int src_var) { if (!live.vars_interfere(src_var, dst_var)) @@ -126,6 +126,8 @@ can_coalesce_vars(const fs_live_variables &live, int scan_ip = scan_block->start_ip - 1; + bool seen_src_write = false; + bool seen_copy = false; foreach_inst_in_block(fs_inst, scan_inst, scan_block) { scan_ip++; @@ -134,17 +136,51 @@ can_coalesce_vars(const fs_live_variables &live, continue; /* Ignore the copying instruction itself */ - if (scan_inst == inst) + if (scan_inst == inst) { + seen_copy = true; continue; + } if (scan_ip > end_ip) return true; /* registers do not interfere */ + if (seen_src_write && !seen_copy) { + /* In order to satisfy the guarantee of register coalescing, we + * must ensure that the two registers always have the same value + * during the intersection of their live ranges. One way to do + * this is to simply ensure that neither is ever written apart + * from the one copy which syncs up the two registers. However, + * this can be overly conservative and only works in the case + * where the destination live range is entirely contained in the + * source live range. + * + * To handle the other case where the source is contained in the + * destination, we allow writes to the source register as long as + * they happen before the copy, in the same block as the copy, and + * the destination is never read between first such write and the + * copy. This effectively moves the write from the copy up. + */ + for (int j = 0; j < scan_inst->sources; j++) { + if (regions_overlap(scan_inst->src[j], scan_inst->size_read(j), + inst->dst, inst->size_written)) + return false; /* registers interfere */ + } + } + + /* The MOV being coalesced had better be the only instruction which + * writes to the coalesce destination in the intersection. + */ if (regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->dst, inst->size_written) || - regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->src[0], inst->size_read(0))) + inst->dst, inst->size_written)) return false; /* registers interfere */ + + /* See the big comment above */ + if (regions_overlap(scan_inst->dst, scan_inst->size_written, + inst->src[0], inst->size_read(0))) { + if (seen_copy || scan_block != block) + return false; + seen_src_write = true; + } } } @@ -228,7 +264,7 @@ fs_visitor::register_coalesce() dst_var[i] = live.var_from_vgrf[dst_reg] + dst_reg_offset[i]; src_var[i] = live.var_from_vgrf[src_reg] + i; - if (!can_coalesce_vars(live, cfg, inst, dst_var[i], src_var[i])) { + if (!can_coalesce_vars(live, cfg, block, inst, dst_var[i], src_var[i])) { can_coalesce = false; src_reg = ~0u; break; -- 2.30.2