i965/fs_inst: Add an is_copy_payload helper
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_register_coalesce.cpp
index 4e3b611197ad823ff2f8c2c50f13f3f96458fb3a..2ad7079bdf804e7b80fba55df0379bfc573b33b5 100644 (file)
  */
 
 #include "brw_fs.h"
+#include "brw_cfg.h"
 #include "brw_fs_live_variables.h"
 
 static bool
-is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes)
+is_nop_mov(const fs_inst *inst)
 {
-   if (inst->opcode != BRW_OPCODE_MOV ||
+   if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
+      fs_reg dst = inst->dst;
+      for (int i = 0; i < inst->sources; i++) {
+         dst.reg_offset = i;
+         if (!dst.equals(inst->src[i])) {
+            return false;
+         }
+      }
+      return true;
+   } else if (inst->opcode == BRW_OPCODE_MOV) {
+      return inst->dst.equals(inst->src[0]);
+   }
+
+   return false;
+}
+
+static bool
+is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
+{
+   if ((inst->opcode != BRW_OPCODE_MOV &&
+        inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) ||
        inst->is_partial_write() ||
        inst->saturate ||
        inst->src[0].file != GRF ||
@@ -58,53 +79,58 @@ is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes)
       return false;
    }
 
-   if (virtual_grf_sizes[inst->src[0].reg] >
-       virtual_grf_sizes[inst->dst.reg])
+   if (v->alloc.sizes[inst->src[0].reg] >
+       v->alloc.sizes[inst->dst.reg])
       return false;
 
+   if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
+      if (!inst->is_copy_payload(v->alloc)) {
+         return false;
+      }
+   }
+
    return true;
 }
 
 static bool
 can_coalesce_vars(brw::fs_live_variables *live_intervals,
-                  const exec_list *instructions, const fs_inst *inst,
+                  const cfg_t *cfg, const fs_inst *inst,
                   int var_to, int var_from)
 {
-   if (live_intervals->vars_interfere(var_from, var_to) &&
-       !inst->dst.equals(inst->src[0])) {
-
-      /* We know that the live ranges of A (var_from) and B (var_to)
-       * interfere because of the ->vars_interfere() call above. If the end
-       * of B's live range is after the end of A's range, then we know two
-       * things:
-       *  - the start of B's live range must be in A's live range (since we
-       *    already know the two ranges interfere, this is the only remaining
-       *    possibility)
-       *  - the interference isn't of the form we're looking for (where B is
-       *    entirely inside A)
-       */
-      if (live_intervals->end[var_to] > live_intervals->end[var_from])
-         return false;
+   if (!live_intervals->vars_interfere(var_from, var_to))
+      return true;
 
-      int scan_ip = -1;
+   int start_to = live_intervals->start[var_to];
+   int end_to = live_intervals->end[var_to];
+   int start_from = live_intervals->start[var_from];
+   int end_from = live_intervals->end[var_from];
 
-      foreach_list(n, instructions) {
-         fs_inst *scan_inst = (fs_inst *)n;
-         scan_ip++;
+   /* Variables interfere and one line range isn't a subset of the other. */
+   if ((end_to > end_from && start_from < start_to) ||
+       (end_from > end_to && start_to < start_from))
+      return false;
 
-         if (scan_inst->is_control_flow())
-            return false;
+   int start_ip = MIN2(start_to, start_from);
+   int scan_ip = -1;
 
-         if (scan_ip <= live_intervals->start[var_to])
-            continue;
+   foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
+      scan_ip++;
 
-         if (scan_ip > live_intervals->end[var_to])
-            break;
+      if (scan_ip < start_ip)
+         continue;
 
-         if (scan_inst->dst.equals(inst->dst) ||
-             scan_inst->dst.equals(inst->src[0]))
-            return false;
-      }
+      if (scan_inst->is_control_flow())
+         return false;
+
+      if (scan_ip <= live_intervals->start[var_to])
+         continue;
+
+      if (scan_ip > live_intervals->end[var_to])
+         return true;
+
+      if (scan_inst->dst.equals(inst->dst) ||
+          scan_inst->dst.equals(inst->src[0]))
+         return false;
    }
 
    return true;
@@ -120,23 +146,28 @@ fs_visitor::register_coalesce()
    int src_size = 0;
    int channels_remaining = 0;
    int reg_from = -1, reg_to = -1;
-   int reg_to_offset[MAX_SAMPLER_MESSAGE_SIZE];
-   fs_inst *mov[MAX_SAMPLER_MESSAGE_SIZE];
-   int var_to[MAX_SAMPLER_MESSAGE_SIZE];
-   int var_from[MAX_SAMPLER_MESSAGE_SIZE];
+   int reg_to_offset[MAX_VGRF_SIZE];
+   fs_inst *mov[MAX_VGRF_SIZE];
+   int var_to[MAX_VGRF_SIZE];
+   int var_from[MAX_VGRF_SIZE];
 
-   foreach_list(node, &this->instructions) {
-      fs_inst *inst = (fs_inst *)node;
+   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+      if (!is_coalesce_candidate(this, inst))
+         continue;
 
-      if (!is_coalesce_candidate(inst, virtual_grf_sizes))
+      if (is_nop_mov(inst)) {
+         inst->opcode = BRW_OPCODE_NOP;
+         progress = true;
          continue;
+      }
 
       if (reg_from != inst->src[0].reg) {
          reg_from = inst->src[0].reg;
 
-         src_size = virtual_grf_sizes[inst->src[0].reg];
-         assert(src_size <= MAX_SAMPLER_MESSAGE_SIZE);
+         src_size = alloc.sizes[inst->src[0].reg];
+         assert(src_size <= MAX_VGRF_SIZE);
 
+         assert(inst->src[0].width % 8 == 0);
          channels_remaining = src_size;
          memset(mov, 0, sizeof(mov));
 
@@ -146,20 +177,47 @@ fs_visitor::register_coalesce()
       if (reg_to != inst->dst.reg)
          continue;
 
-      const int offset = inst->src[0].reg_offset;
-      reg_to_offset[offset] = inst->dst.reg_offset;
-      mov[offset] = inst;
-      channels_remaining--;
+      if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
+         for (int i = 0; i < src_size; i++) {
+            reg_to_offset[i] = i;
+         }
+         mov[0] = inst;
+         channels_remaining -= inst->regs_written;
+      } else {
+         const int offset = inst->src[0].reg_offset;
+         if (mov[offset]) {
+            /* This is the second time that this offset in the register has
+             * been set.  This means, in particular, that inst->dst was
+             * live before this instruction and that the live ranges of
+             * inst->dst and inst->src[0] overlap and we can't coalesce the
+             * two variables.  Let's ensure that doesn't happen.
+             */
+            channels_remaining = -1;
+            continue;
+         }
+         reg_to_offset[offset] = inst->dst.reg_offset;
+         if (inst->src[0].width == 16)
+            reg_to_offset[offset + 1] = inst->dst.reg_offset + 1;
+         mov[offset] = inst;
+         channels_remaining -= inst->regs_written;
+      }
 
       if (channels_remaining)
          continue;
 
       bool can_coalesce = true;
       for (int i = 0; i < src_size; i++) {
+         if (reg_to_offset[i] != reg_to_offset[0] + i) {
+            /* Registers are out-of-order. */
+            can_coalesce = false;
+            reg_from = -1;
+            break;
+         }
+
          var_to[i] = live_intervals->var_from_vgrf[reg_to] + reg_to_offset[i];
          var_from[i] = live_intervals->var_from_vgrf[reg_from] + i;
 
-         if (!can_coalesce_vars(live_intervals, &instructions, inst,
+         if (!can_coalesce_vars(live_intervals, cfg, inst,
                                 var_to[i], var_from[i])) {
             can_coalesce = false;
             reg_from = -1;
@@ -170,29 +228,30 @@ fs_visitor::register_coalesce()
       if (!can_coalesce)
          continue;
 
+      progress = true;
+      bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD;
+
       for (int i = 0; i < src_size; i++) {
          if (mov[i]) {
             mov[i]->opcode = BRW_OPCODE_NOP;
             mov[i]->conditional_mod = BRW_CONDITIONAL_NONE;
             mov[i]->dst = reg_undef;
-            mov[i]->src[0] = reg_undef;
-            mov[i]->src[1] = reg_undef;
-            mov[i]->src[2] = reg_undef;
+            for (int j = 0; j < mov[i]->sources; j++) {
+               mov[i]->src[j] = reg_undef;
+            }
          }
       }
 
-      foreach_list(node, &this->instructions) {
-         fs_inst *scan_inst = (fs_inst *)node;
-
+      foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
          for (int i = 0; i < src_size; i++) {
-            if (mov[i]) {
+            if (mov[i] || was_load_payload) {
                if (scan_inst->dst.file == GRF &&
                    scan_inst->dst.reg == reg_from &&
                    scan_inst->dst.reg_offset == i) {
                   scan_inst->dst.reg = reg_to;
                   scan_inst->dst.reg_offset = reg_to_offset[i];
                }
-               for (int j = 0; j < 3; j++) {
+               for (int j = 0; j < scan_inst->sources; j++) {
                   if (scan_inst->src[j].file == GRF &&
                       scan_inst->src[j].reg == reg_from &&
                       scan_inst->src[j].reg_offset == i) {
@@ -215,17 +274,15 @@ fs_visitor::register_coalesce()
       reg_from = -1;
    }
 
-   foreach_list_safe(node, &this->instructions) {
-      fs_inst *inst = (fs_inst *)node;
-
-      if (inst->opcode == BRW_OPCODE_NOP) {
-         inst->remove();
-         progress = true;
+   if (progress) {
+      foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) {
+         if (inst->opcode == BRW_OPCODE_NOP) {
+            inst->remove(block);
+         }
       }
-   }
 
-   if (progress)
       invalidate_live_intervals();
+   }
 
    return progress;
 }