From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 9 Sep 2016 10:21:06 +0000 (+0200)
Subject: i965/vec4: avoid spilling of registers that mix 32-bit and 64-bit access
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8843c43f7e85423b559383b38c77477139b4b06e;p=mesa.git

i965/vec4: avoid spilling of registers that mix 32-bit and 64-bit access

When 64-bit registers are (un)spilled, we need to execute data shuffling
code before writing to or after reading from memory. If we have instructions
that operate on 64-bit data via 32-bit instructions, (un)spills for the
register produced by 32-bit instructions will not do data shuffling at all
(because we only see a normal 32-bit istruction seemingly operating on
32-bit data). This means that subsequent reads with that register using DF
access will unshuffle data read from memory that was never adequately
shuffled when it was written.

Fixing this would require to identify which 32-bit instructions write
64-bit data and emit spill instructions only when the full 64-bit
data has been written (by multiple 32-bit instructions writing to different
offsets of the same register) and always emit 64-bit unspills whenever
64-bit data is read, even when the instruction uses a 32-bit type to read
from them.

Reviewed-by: Matt Turner <mattst88@gmail.com>
---

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 242e6664623..79fd15b12bb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -374,9 +374,13 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
 {
    float loop_scale = 1.0;
 
+   unsigned *reg_type_size = (unsigned *)
+      ralloc_size(NULL, this->alloc.count * sizeof(unsigned));
+
    for (unsigned i = 0; i < this->alloc.count; i++) {
       spill_costs[i] = 0.0;
       no_spill[i] = alloc.sizes[i] != 1 && alloc.sizes[i] != 2;
+      reg_type_size[i] = 0;
    }
 
    /* Calculate costs for spilling nodes.  Call it a cost of 1 per
@@ -406,6 +410,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
                if (type_sz(inst->src[i].type) == 8 && inst->exec_size != 8)
                   no_spill[inst->src[i].nr] = true;
             }
+
+            /* We can't spill registers that mix 32-bit and 64-bit access (that
+             * contain 64-bit data that is operated on via 32-bit instructions)
+             */
+            unsigned type_size = type_sz(inst->src[i].type);
+            if (reg_type_size[inst->src[i].nr] == 0)
+               reg_type_size[inst->src[i].nr] = type_size;
+            else if (reg_type_size[inst->src[i].nr] != type_size)
+               no_spill[inst->src[i].nr] = true;
          }
       }
 
@@ -422,6 +435,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
           */
          if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8)
             no_spill[inst->dst.nr] = true;
+
+         /* We can't spill registers that mix 32-bit and 64-bit access (that
+          * contain 64-bit data that is operated on via 32-bit instructions)
+          */
+         unsigned type_size = type_sz(inst->dst.type);
+         if (reg_type_size[inst->dst.nr] == 0)
+            reg_type_size[inst->dst.nr] = type_size;
+         else if (reg_type_size[inst->dst.nr] != type_size)
+            no_spill[inst->dst.nr] = true;
       }
 
       switch (inst->opcode) {
@@ -448,6 +470,8 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
          break;
       }
    }
+
+   ralloc_free(reg_type_size);
 }
 
 int