}
static bool
-can_coalesce_vars(const fs_live_variables &live,
- const cfg_t *cfg, const fs_inst *inst,
+can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg,
+ const bblock_t *block, const fs_inst *inst,
int dst_var, int src_var)
{
if (!live.vars_interfere(src_var, dst_var))
int scan_ip = scan_block->start_ip - 1;
+ bool seen_src_write = false;
+ bool seen_copy = false;
foreach_inst_in_block(fs_inst, scan_inst, scan_block) {
scan_ip++;
continue;
/* Ignore the copying instruction itself */
- if (scan_inst == inst)
+ if (scan_inst == inst) {
+ seen_copy = true;
continue;
+ }
if (scan_ip > end_ip)
return true; /* registers do not interfere */
+ if (seen_src_write && !seen_copy) {
+ /* In order to satisfy the guarantee of register coalescing, we
+ * must ensure that the two registers always have the same value
+ * during the intersection of their live ranges. One way to do
+ * this is to simply ensure that neither is ever written apart
+ * from the one copy which syncs up the two registers. However,
+ * this can be overly conservative and only works in the case
+ * where the destination live range is entirely contained in the
+ * source live range.
+ *
+ * To handle the other case where the source is contained in the
+ * destination, we allow writes to the source register as long as
+ * they happen before the copy, in the same block as the copy, and
+ * the destination is never read between first such write and the
+ * copy. This effectively moves the write from the copy up.
+ */
+ for (int j = 0; j < scan_inst->sources; j++) {
+ if (regions_overlap(scan_inst->src[j], scan_inst->size_read(j),
+ inst->dst, inst->size_written))
+ return false; /* registers interfere */
+ }
+ }
+
+ /* The MOV being coalesced had better be the only instruction which
+ * writes to the coalesce destination in the intersection.
+ */
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
- inst->dst, inst->size_written) ||
- regions_overlap(scan_inst->dst, scan_inst->size_written,
- inst->src[0], inst->size_read(0)))
+ inst->dst, inst->size_written))
return false; /* registers interfere */
+
+ /* See the big comment above */
+ if (regions_overlap(scan_inst->dst, scan_inst->size_written,
+ inst->src[0], inst->size_read(0))) {
+ if (seen_copy || scan_block != block)
+ return false;
+ seen_src_write = true;
+ }
}
}
dst_var[i] = live.var_from_vgrf[dst_reg] + dst_reg_offset[i];
src_var[i] = live.var_from_vgrf[src_reg] + i;
- if (!can_coalesce_vars(live, cfg, inst, dst_var[i], src_var[i])) {
+ if (!can_coalesce_vars(live, cfg, block, inst, dst_var[i], src_var[i])) {
can_coalesce = false;
src_reg = ~0u;
break;