i965/fs: fix copy propagation of partially invalidated entries
authorIago Toral Quiroga <itoral@igalia.com>
Fri, 11 Mar 2016 13:35:07 +0000 (14:35 +0100)
committerSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Mon, 16 May 2016 07:55:32 +0000 (09:55 +0200)
We were not invalidating entries with a src that reads more than one register
when we find writes that overwrite any register read by entry->src after
the first. This leads to incorrect copy propagation because we re-use
entries from the ACP that have been partially invalidated. Same thing for
entries with a dst that writes to more than one register.

v2 (Sam):
- Improve code by defining regions_overlap() and using it instead of a
loop (Curro).

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp

index 23c545b7a6a20bf46d8b5e1c4aa205afba0b5c6a..4b73b07a2401ae8489f8d8bf7c268dc2f3db8a2b 100644 (file)
@@ -44,6 +44,7 @@ struct acp_entry : public exec_node {
    fs_reg dst;
    fs_reg src;
    uint8_t regs_written;
+   uint8_t regs_read;
    enum opcode opcode;
    bool saturate;
 };
@@ -702,6 +703,14 @@ can_propagate_from(fs_inst *inst)
            !inst->is_partial_write());
 }
 
+inline bool
+regions_overlap(const fs_reg &r, unsigned n, const fs_reg &s, unsigned m)
+{
+   return r.file == s.file && r.nr == s.nr &&
+      !(r.reg_offset + n < s.reg_offset ||
+        s.reg_offset + m < r.reg_offset);
+}
+
 /* Walks a basic block and does copy propagation on it using the acp
  * list.
  */
@@ -728,18 +737,26 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
       /* kill the destination from the ACP */
       if (inst->dst.file == VGRF) {
          foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) {
-           if (inst->overwrites_reg(entry->dst)) {
-              entry->remove();
-           }
-        }
+            if (regions_overlap(entry->dst, entry->regs_written,
+                                inst->dst, inst->regs_written)) {
+               entry->remove();
+               break;
+            }
+         }
 
          /* Oops, we only have the chaining hash based on the destination, not
           * the source, so walk across the entire table.
           */
          for (int i = 0; i < ACP_HASH_SIZE; i++) {
             foreach_in_list_safe(acp_entry, entry, &acp[i]) {
-               if (inst->overwrites_reg(entry->src))
+               /* Make sure we kill the entry if this instruction overwrites
+                * _any_ of the registers that it reads
+                */
+               if (regions_overlap(entry->src, entry->regs_read,
+                                   inst->dst, inst->regs_written)) {
                   entry->remove();
+                  continue;
+               }
             }
         }
       }
@@ -748,10 +765,11 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
        * operand of another instruction, add it to the ACP.
        */
       if (can_propagate_from(inst)) {
-        acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
-        entry->dst = inst->dst;
-        entry->src = inst->src[0];
+         acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
+         entry->dst = inst->dst;
+         entry->src = inst->src[0];
          entry->regs_written = inst->regs_written;
+         entry->regs_read = inst->regs_read(0);
          entry->opcode = inst->opcode;
          entry->saturate = inst->saturate;
          acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
@@ -769,6 +787,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
                entry->dst.reg_offset = offset;
                entry->src = inst->src[i];
                entry->regs_written = regs_written;
+               entry->regs_read = inst->regs_read(i);
                entry->opcode = inst->opcode;
                if (!entry->dst.equals(inst->src[i])) {
                   acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);