i965/fs: detect different bit size accesses to uniforms to push them in proper locations
authorSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Mon, 20 Feb 2017 10:03:46 +0000 (11:03 +0100)
committerSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Wed, 1 Mar 2017 05:50:29 +0000 (06:50 +0100)
Previously, if we had accesses with different sizes to the same uniform, we might not
push it aligned with the bigger one. This is a problem in BSW/BXT when we access
an array of DF uniform with both direct and indirect addressing because for the latter
we use 32-bit MOV INDIRECT instructions. However this problem can happen with other
generations and bitsizes.

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
src/mesa/drivers/dri/i965/brw_fs.cpp

index c713caa9b6f4448739e682e7b9d3b7b31b7d0223..68e73cc5cd801a672bbb73e884444c5f9e6556d6 100644 (file)
@@ -1853,7 +1853,10 @@ fs_visitor::compact_virtual_grfs()
 }
 
 static void
-set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
+set_push_pull_constant_loc(unsigned uniform, int *chunk_start,
+                           unsigned *max_chunk_bitsize,
+                           bool contiguous, unsigned bitsize,
+                           const unsigned target_bitsize,
                            int *push_constant_loc, int *pull_constant_loc,
                            unsigned *num_push_constants,
                            unsigned *num_pull_constants,
@@ -1865,11 +1868,23 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
    if (*chunk_start < 0)
       *chunk_start = uniform;
 
+   /* Keep track of the maximum bit size access in contiguous uniforms */
+   *max_chunk_bitsize = MAX2(*max_chunk_bitsize, bitsize);
+
    /* If this element does not need to be contiguous with the next, we
     * split at this point and everything between chunk_start and u forms a
     * single chunk.
     */
    if (!contiguous) {
+      /* If bitsize doesn't match the target one, skip it */
+      if (*max_chunk_bitsize != target_bitsize) {
+         /* FIXME: right now we only support 32 and 64-bit accesses */
+         assert(*max_chunk_bitsize == 4 || *max_chunk_bitsize == 8);
+         *max_chunk_bitsize = 0;
+         *chunk_start = -1;
+         return;
+      }
+
       unsigned chunk_size = uniform - *chunk_start + 1;
 
       /* Decide whether we should push or pull this parameter.  In the
@@ -1887,6 +1902,7 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
             pull_constant_loc[j] = (*num_pull_constants)++;
       }
 
+      *max_chunk_bitsize = 0;
       *chunk_start = -1;
    }
 }
@@ -1909,8 +1925,8 @@ fs_visitor::assign_constant_locations()
 
    bool is_live[uniforms];
    memset(is_live, 0, sizeof(is_live));
-   bool is_live_64bit[uniforms];
-   memset(is_live_64bit, 0, sizeof(is_live_64bit));
+   unsigned bitsize_access[uniforms];
+   memset(bitsize_access, 0, sizeof(bitsize_access));
 
    /* For each uniform slot, a value of true indicates that the given slot and
     * the next slot must remain contiguous.  This is used to keep us from
@@ -1947,23 +1963,18 @@ fs_visitor::assign_constant_locations()
             for (unsigned j = constant_nr; j < last; j++) {
                is_live[j] = true;
                contiguous[j] = true;
-               if (type_sz(inst->src[i].type) == 8) {
-                  is_live_64bit[j] = true;
-               }
+               bitsize_access[j] = MAX2(bitsize_access[j], type_sz(inst->src[i].type));
             }
             is_live[last] = true;
-            if (type_sz(inst->src[i].type) == 8) {
-                  is_live_64bit[last] = true;
-            }
+            bitsize_access[last] = MAX2(bitsize_access[last], type_sz(inst->src[i].type));
          } else {
             if (constant_nr >= 0 && constant_nr < (int) uniforms) {
                int regs_read = inst->components_read(i) *
                   type_sz(inst->src[i].type) / 4;
                for (int j = 0; j < regs_read; j++) {
                   is_live[constant_nr + j] = true;
-                  if (type_sz(inst->src[i].type) == 8) {
-                     is_live_64bit[constant_nr + j] = true;
-                  }
+                  bitsize_access[constant_nr + j] =
+                     MAX2(bitsize_access[constant_nr + j], type_sz(inst->src[i].type));
                }
             }
          }
@@ -2002,13 +2013,17 @@ fs_visitor::assign_constant_locations()
    memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc));
 
    int chunk_start = -1;
+   unsigned max_chunk_bitsize = 0;
 
    /* First push 64-bit uniforms to ensure they are properly aligned */
+   const unsigned uniform_64_bit_size = type_sz(BRW_REGISTER_TYPE_DF);
    for (unsigned u = 0; u < uniforms; u++) {
-      if (!is_live[u] || !is_live_64bit[u])
+      if (!is_live[u])
          continue;
 
-      set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
+      set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
+                                 contiguous[u], bitsize_access[u],
+                                 uniform_64_bit_size,
                                  push_constant_loc, pull_constant_loc,
                                  &num_push_constants, &num_pull_constants,
                                  max_push_components, max_chunk_size,
@@ -2017,15 +2032,18 @@ fs_visitor::assign_constant_locations()
    }
 
    /* Then push the rest of uniforms */
+   const unsigned uniform_32_bit_size = type_sz(BRW_REGISTER_TYPE_F);
    for (unsigned u = 0; u < uniforms; u++) {
-      if (!is_live[u] || is_live_64bit[u])
+      if (!is_live[u])
          continue;
 
       /* Skip thread_local_id_index to put it in the last push register. */
       if (thread_local_id_index == (int)u)
          continue;
 
-      set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
+      set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
+                                 contiguous[u], bitsize_access[u],
+                                 uniform_32_bit_size,
                                  push_constant_loc, pull_constant_loc,
                                  &num_push_constants, &num_pull_constants,
                                  max_push_components, max_chunk_size,