}
static bool
-can_coalesce_vars(const fs_live_variables &live,
- const cfg_t *cfg, const fs_inst *inst,
+can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg,
+ const bblock_t *block, const fs_inst *inst,
int dst_var, int src_var)
{
if (!live.vars_interfere(src_var, dst_var))
int scan_ip = scan_block->start_ip - 1;
+ bool seen_src_write = false;
+ bool seen_copy = false;
foreach_inst_in_block(fs_inst, scan_inst, scan_block) {
scan_ip++;
continue;
/* Ignore the copying instruction itself */
- if (scan_inst == inst)
+ if (scan_inst == inst) {
+ seen_copy = true;
continue;
+ }
if (scan_ip > end_ip)
return true; /* registers do not interfere */
+ if (seen_src_write && !seen_copy) {
+ /* In order to satisfy the guarantee of register coalescing, we
+ * must ensure that the two registers always have the same value
+ * during the intersection of their live ranges. One way to do
+ * this is to simply ensure that neither is ever written apart
+ * from the one copy which syncs up the two registers. However,
+ * this can be overly conservative and only works in the case
+ * where the destination live range is entirely contained in the
+ * source live range.
+ *
+ * To handle the other case where the source is contained in the
+ * destination, we allow writes to the source register as long as
+ * they happen before the copy, in the same block as the copy, and
+ * the destination is never read between first such write and the
+ * copy. This effectively moves the write from the copy up.
+ */
+ for (int j = 0; j < scan_inst->sources; j++) {
+ if (regions_overlap(scan_inst->src[j], scan_inst->size_read(j),
+ inst->dst, inst->size_written))
+ return false; /* registers interfere */
+ }
+ }
+
+ /* The MOV being coalesced had better be the only instruction which
+ * writes to the coalesce destination in the intersection.
+ */
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
- inst->dst, inst->size_written) ||
- regions_overlap(scan_inst->dst, scan_inst->size_written,
- inst->src[0], inst->size_read(0)))
+ inst->dst, inst->size_written))
return false; /* registers interfere */
+
+ /* See the big comment above */
+ if (regions_overlap(scan_inst->dst, scan_inst->size_written,
+ inst->src[0], inst->size_read(0))) {
+ if (seen_copy || scan_block != block)
+ return false;
+ seen_src_write = true;
+ }
}
}
dst_var[i] = live.var_from_vgrf[dst_reg] + dst_reg_offset[i];
src_var[i] = live.var_from_vgrf[src_reg] + i;
- if (!can_coalesce_vars(live, cfg, inst, dst_var[i], src_var[i])) {
+ if (!can_coalesce_vars(live, cfg, block, inst, dst_var[i], src_var[i])) {
can_coalesce = false;
src_reg = ~0u;
break;
progress = true;
for (int i = 0; i < src_size; i++) {
- if (mov[i]) {
+ if (!mov[i])
+ continue;
+
+ if (mov[i]->conditional_mod == BRW_CONDITIONAL_NONE) {
mov[i]->opcode = BRW_OPCODE_NOP;
- mov[i]->conditional_mod = BRW_CONDITIONAL_NONE;
mov[i]->dst = reg_undef;
for (int j = 0; j < mov[i]->sources; j++) {
mov[i]->src[j] = reg_undef;
}
+ } else {
+ /* If we have a conditional modifier, rewrite the MOV to be a
+ * MOV.cmod from the coalesced register. Hopefully, cmod
+ * propagation will clean this up and move it to the instruction
+ * that writes the register. If not, this keeps things correct
+ * while still letting us coalesce.
+ */
+ assert(mov[i]->opcode == BRW_OPCODE_MOV);
+ assert(mov[i]->sources == 1);
+ mov[i]->src[0] = mov[i]->dst;
+ mov[i]->dst = retype(brw_null_reg(), mov[i]->dst.type);
}
}