i965/fs: Make resolve_source_modifiers consistent with the vec4 version
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_register_coalesce.cpp
index 73f18f9d62b8515c696d57df2057cc534bd93653..72e873857ced950272a885b15db2914b50ba3e2c 100644 (file)
@@ -63,25 +63,6 @@ is_nop_mov(const fs_inst *inst)
    return false;
 }
 
-static bool
-is_copy_payload(const fs_visitor *v, const fs_inst *inst)
-{
-   if (v->virtual_grf_sizes[inst->src[0].reg] != inst->regs_written)
-      return false;
-
-   const int reg = inst->src[0].reg;
-   if (inst->src[0].reg_offset != 0)
-      return false;
-
-   for (int i = 1; i < inst->sources; i++) {
-      if (inst->src[i].reg != reg ||
-          inst->src[i].reg_offset != i) {
-         return false;
-      }
-   }
-   return true;
-}
-
 static bool
 is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
 {
@@ -98,12 +79,12 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
       return false;
    }
 
-   if (v->virtual_grf_sizes[inst->src[0].reg] >
-       v->virtual_grf_sizes[inst->dst.reg])
+   if (v->alloc.sizes[inst->src[0].reg] >
+       v->alloc.sizes[inst->dst.reg])
       return false;
 
    if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
-      if (!is_copy_payload(v, inst)) {
+      if (!inst->is_copy_payload(v->alloc)) {
          return false;
       }
    }
@@ -165,10 +146,10 @@ fs_visitor::register_coalesce()
    int src_size = 0;
    int channels_remaining = 0;
    int reg_from = -1, reg_to = -1;
-   int reg_to_offset[MAX_SAMPLER_MESSAGE_SIZE];
-   fs_inst *mov[MAX_SAMPLER_MESSAGE_SIZE];
-   int var_to[MAX_SAMPLER_MESSAGE_SIZE];
-   int var_from[MAX_SAMPLER_MESSAGE_SIZE];
+   int reg_to_offset[MAX_VGRF_SIZE];
+   fs_inst *mov[MAX_VGRF_SIZE];
+   int var_to[MAX_VGRF_SIZE];
+   int var_from[MAX_VGRF_SIZE];
 
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (!is_coalesce_candidate(this, inst))
@@ -183,8 +164,8 @@ fs_visitor::register_coalesce()
       if (reg_from != inst->src[0].reg) {
          reg_from = inst->src[0].reg;
 
-         src_size = virtual_grf_sizes[inst->src[0].reg];
-         assert(src_size <= MAX_SAMPLER_MESSAGE_SIZE);
+         src_size = alloc.sizes[inst->src[0].reg];
+         assert(src_size <= MAX_VGRF_SIZE);
 
          channels_remaining = src_size;
          memset(mov, 0, sizeof(mov));
@@ -200,12 +181,24 @@ fs_visitor::register_coalesce()
             reg_to_offset[i] = i;
          }
          mov[0] = inst;
-         channels_remaining -= inst->sources;
+         channels_remaining -= inst->regs_written;
       } else {
          const int offset = inst->src[0].reg_offset;
+         if (mov[offset]) {
+            /* This is the second time that this offset in the register has
+             * been set.  This means, in particular, that inst->dst was
+             * live before this instruction and that the live ranges of
+             * inst->dst and inst->src[0] overlap and we can't coalesce the
+             * two variables.  Let's ensure that doesn't happen.
+             */
+            channels_remaining = -1;
+            continue;
+         }
          reg_to_offset[offset] = inst->dst.reg_offset;
+         if (inst->regs_written > 1)
+            reg_to_offset[offset + 1] = inst->dst.reg_offset + 1;
          mov[offset] = inst;
-         channels_remaining--;
+         channels_remaining -= inst->regs_written;
       }
 
       if (channels_remaining)
@@ -213,6 +206,13 @@ fs_visitor::register_coalesce()
 
       bool can_coalesce = true;
       for (int i = 0; i < src_size; i++) {
+         if (reg_to_offset[i] != reg_to_offset[0] + i) {
+            /* Registers are out-of-order. */
+            can_coalesce = false;
+            reg_from = -1;
+            break;
+         }
+
          var_to[i] = live_intervals->var_from_vgrf[reg_to] + reg_to_offset[i];
          var_from[i] = live_intervals->var_from_vgrf[reg_from] + i;
 
@@ -228,7 +228,6 @@ fs_visitor::register_coalesce()
          continue;
 
       progress = true;
-      bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD;
 
       for (int i = 0; i < src_size; i++) {
          if (mov[i]) {
@@ -242,22 +241,19 @@ fs_visitor::register_coalesce()
       }
 
       foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
-         for (int i = 0; i < src_size; i++) {
-            if (mov[i] || was_load_payload) {
-               if (scan_inst->dst.file == GRF &&
-                   scan_inst->dst.reg == reg_from &&
-                   scan_inst->dst.reg_offset == i) {
-                  scan_inst->dst.reg = reg_to;
-                  scan_inst->dst.reg_offset = reg_to_offset[i];
-               }
-               for (int j = 0; j < scan_inst->sources; j++) {
-                  if (scan_inst->src[j].file == GRF &&
-                      scan_inst->src[j].reg == reg_from &&
-                      scan_inst->src[j].reg_offset == i) {
-                     scan_inst->src[j].reg = reg_to;
-                     scan_inst->src[j].reg_offset = reg_to_offset[i];
-                  }
-               }
+         if (scan_inst->dst.file == GRF &&
+             scan_inst->dst.reg == reg_from) {
+            scan_inst->dst.reg = reg_to;
+            scan_inst->dst.reg_offset =
+               reg_to_offset[scan_inst->dst.reg_offset];
+         }
+
+         for (int j = 0; j < scan_inst->sources; j++) {
+            if (scan_inst->src[j].file == GRF &&
+                scan_inst->src[j].reg == reg_from) {
+               scan_inst->src[j].reg = reg_to;
+               scan_inst->src[j].reg_offset =
+                  reg_to_offset[scan_inst->src[j].reg_offset];
             }
          }
       }