nir/find_array_copies: Handle wildcards and overlapping copies
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 18 Jun 2019 10:12:49 +0000 (12:12 +0200)
committerConnor Abbott <cwabbott0@gmail.com>
Mon, 29 Jul 2019 09:36:25 +0000 (11:36 +0200)
This commit rewrites opt_find_array_copies to be able to handle
an array copy sequence with other intervening operations in between. In
particular, this handles the case where we OpLoad an array of structs
and then OpStore it, which generates code like:

foo[0].a = bar[0].a
foo[0].b = bar[0].b
foo[1].a = bar[1].a
foo[1].b = bar[1].b
...

that wasn't recognized by the previous pass.

In order to correctly handle copying arrays of arrays, and in particular
to correctly handle copies involving wildcards, we need to use a tree
structure similar to lower_vars_to_ssa so that we can walk all the
partial array copies invalidated by a particular write, including
ones where one of the common indices is a wildcard. I actually think
that when factoring in the needed hashing/comparing code, a hash table
based approach wouldn't be a lot smaller anyways.

All of the changes come from tessellation control shaders in Strange
Brigade, where we're able to remove the DXVK-inserted copy at the
beginning of the shader. These are the result for radv:

Totals from affected shaders:
SGPRS: 4576 -> 4576 (0.00 %)
VGPRS: 13784 -> 5560 (-59.66 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 8696 -> 6876 (-20.93 %) dwords per thread
Code Size: 329940 -> 263268 (-20.21 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 330 -> 898 (172.12 %)
Wait states: 0 -> 0 (0.00 %)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/compiler/nir/nir.h
src/compiler/nir/nir_deref.c
src/compiler/nir/nir_opt_find_array_copies.c

index b011f868bc2c37557e0c2d29cbe9fef143a4c0d7..a3c44ff988e219200e35694fdbdb81fb73dfe756 100644 (file)
@@ -1223,6 +1223,7 @@ nir_deref_instr_get_variable(const nir_deref_instr *instr)
 }
 
 bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
+bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr);
 bool nir_deref_instr_has_complex_use(nir_deref_instr *instr);
 
 bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
index cdb9105a751fac4f9a685dcb8ee369915e490745..804ec99fa715caae7b50c2d15c929d9623b74b5f 100644 (file)
@@ -121,6 +121,20 @@ nir_deref_instr_has_indirect(nir_deref_instr *instr)
    return false;
 }
 
+bool
+nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
+{
+   for (; instr; instr = nir_deref_instr_parent(instr)) {
+      if (instr->deref_type == nir_deref_type_array &&
+          nir_src_is_const(instr->arr.index) &&
+           nir_src_as_uint(instr->arr.index) >
+           glsl_get_length(nir_deref_instr_parent(instr)->type))
+         return true;
+   }
+
+   return false;
+}
+
 bool
 nir_deref_instr_has_complex_use(nir_deref_instr *deref)
 {
index 1573cce83748652a42691e136cd0883c7957b14a..28f77335d998592c3ec64abd82348abf50767f8b 100644 (file)
 #include "nir_builder.h"
 #include "nir_deref.h"
 
-static bool
-index_ssa_def_cb(nir_ssa_def *def, void *state)
+struct match_node {
+   /* Note: these fields are only valid for leaf nodes */
+
+   unsigned next_array_idx;
+   int src_wildcard_idx;
+   nir_deref_path first_src_path;
+
+   /* The index of the first read of the source path that's part of the copy
+    * we're matching. If the last write to the source path is after this, we
+    * would get a different result from reading it at the end and we can't
+    * emit the copy.
+    */
+   unsigned first_src_read;
+
+   /* The last time there was a write to this node. */
+   unsigned last_overwritten;
+
+   /* The last time there was a write to this node which successfully advanced
+    * next_array_idx. This helps us catch any intervening aliased writes.
+    */
+   unsigned last_successful_write;
+
+   unsigned num_children;
+   struct match_node *children[];
+};
+
+struct match_state {
+   /* Map from nir_variable * -> match_node */
+   struct hash_table *table;
+
+   unsigned cur_instr;
+
+   nir_builder builder;
+
+   void *dead_ctx;
+};
+
+static struct match_node *
+create_match_node(const struct glsl_type *type, struct match_state *state)
 {
-   unsigned *index = (unsigned *) state;
-   def->index = (*index)++;
+   unsigned num_children = 0;
+   if (glsl_type_is_array_or_matrix(type)) {
+      /* One for wildcards */
+      num_children = glsl_get_length(type) + 1;
+   } else if (glsl_type_is_struct_or_ifc(type)) {
+      num_children = glsl_get_length(type);
+   }
 
-   return true;
+   struct match_node *node = rzalloc_size(state->dead_ctx,
+                                          sizeof(struct match_node) +
+                                          num_children * sizeof(struct match_node *));
+   node->num_children = num_children;
+   node->src_wildcard_idx = -1;
+   node->first_src_read = UINT32_MAX;
+   return node;
 }
 
-static nir_deref_instr *
-get_deref_for_load_src(nir_src src, unsigned first_valid_load)
+static struct match_node *
+node_for_deref(nir_deref_instr *instr, struct match_node *parent,
+               struct match_state *state)
 {
-   nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
-   if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
-      return NULL;
+   unsigned idx;
+   switch (instr->deref_type) {
+   case nir_deref_type_var: {
+      struct hash_entry *entry = _mesa_hash_table_search(state->table, instr->var);
+      if (entry) {
+         return entry->data;
+      } else {
+         struct match_node *node = create_match_node(instr->type, state);
+         _mesa_hash_table_insert(state->table, instr->var, node);
+         return node;
+      }
+   }
+
+   case nir_deref_type_array_wildcard:
+      idx = glsl_get_length(instr->type);
+      break;
+
+   case nir_deref_type_array:
+      if (nir_src_is_const(instr->arr.index)) {
+         idx = nir_src_as_uint(instr->arr.index);
+      } else {
+         idx = glsl_get_length(instr->type);
+      }
+      break;
+
+   case nir_deref_type_struct:
+      idx = instr->strct.index;
+      break;
 
-   if (load->dest.ssa.index < first_valid_load)
-      return NULL;
+   default:
+      unreachable("bad deref type");
+   }
 
-   return nir_src_as_deref(load->src[0]);
+   assert(idx < parent->num_children);
+   if (parent->children[idx]) {
+      return parent->children[idx];
+   } else {
+      struct match_node *node = create_match_node(instr->type, state);
+      parent->children[idx] = node;
+      return node;
+   }
 }
 
-struct match_state {
-   /* Index into the array of the last copy or -1 for no ongoing copy. */
-   unsigned next_array_idx;
+static struct match_node *
+node_for_wildcard(const struct glsl_type *type, struct match_node *parent,
+                  struct match_state *state)
+{
+   assert(glsl_type_is_array_or_matrix(type));
+   unsigned idx = glsl_get_length(type);
+
+   if (parent->children[idx]) {
+      return parent->children[idx];
+   } else {
+      struct match_node *node =
+         create_match_node(glsl_get_array_element(type), state);
+      parent->children[idx] = node;
+      return node;
+   }
+}
 
-   /* Length of the array we're copying */
-   unsigned array_len;
+static struct match_node *
+node_for_path(nir_deref_path *path, struct match_state *state)
+{
+   struct match_node *node = NULL;
+   for (nir_deref_instr **instr = path->path; *instr; instr++)
+      node = node_for_deref(*instr, node, state);
 
-   /* Index into the deref path to the array we think is being copied */
-   int src_deref_array_idx;
-   int dst_deref_array_idx;
+   return node;
+}
 
-   /* Deref paths of the first load/store pair or copy */
-   nir_deref_path first_src_path;
-   nir_deref_path first_dst_path;
-};
+static struct match_node *
+node_for_path_with_wildcard(nir_deref_path *path, unsigned wildcard_idx,
+                            struct match_state *state)
+{
+   struct match_node *node = NULL;
+   unsigned idx = 0;
+   for (nir_deref_instr **instr = path->path; *instr; instr++, idx++) {
+      if (idx == wildcard_idx)
+         node = node_for_wildcard((*(instr - 1))->type, node, state);
+      else
+         node = node_for_deref(*instr, node, state);
+   }
+
+   return node;
+}
+
+typedef void (*match_cb)(struct match_node *, struct match_state *);
 
 static void
-match_state_init(struct match_state *state)
+_foreach_aliasing(nir_deref_instr **deref, match_cb cb,
+                  struct match_node *node, struct match_state *state)
 {
-   state->next_array_idx = 0;
-   state->array_len = 0;
-   state->src_deref_array_idx = -1;
-   state->dst_deref_array_idx = -1;
+   if (*deref == NULL) {
+      cb(node, state);
+      return;
+   }
+
+   switch ((*deref)->deref_type) {
+   case nir_deref_type_struct: {
+      struct match_node *child = node->children[(*deref)->strct.index];
+      if (child)
+         _foreach_aliasing(deref + 1, cb, child, state);
+      return;
+   }
+
+   case nir_deref_type_array:
+   case nir_deref_type_array_wildcard: {
+      if ((*deref)->deref_type == nir_deref_type_array_wildcard ||
+          !nir_src_is_const((*deref)->arr.index)) {
+         /* This access may touch any index, so we have to visit all of
+          * them.
+          */
+         for (unsigned i = 0; i < node->num_children; i++) {
+            if (node->children[i])
+               _foreach_aliasing(deref + 1, cb, node->children[i], state);
+         }
+      } else {
+         /* Visit the wildcard entry if any */
+         if (node->children[node->num_children - 1]) {
+            _foreach_aliasing(deref + 1, cb,
+                              node->children[node->num_children - 1], state);
+         }
+
+         unsigned index = nir_src_as_uint((*deref)->arr.index);
+         /* Check that the index is in-bounds */
+         if (index < node->num_children - 1 && node->children[index])
+            _foreach_aliasing(deref + 1, cb, node->children[index], state);
+      }
+      return;
+   }
+
+   default:
+      unreachable("bad deref type");
+   }
 }
 
+/* Given a deref path, find all the leaf deref nodes that alias it. */
+
 static void
-match_state_finish(struct match_state *state)
+foreach_aliasing_node(nir_deref_path *path,
+                      match_cb cb,
+                      struct match_state *state)
 {
-   if (state->next_array_idx > 0) {
-      nir_deref_path_finish(&state->first_src_path);
-      nir_deref_path_finish(&state->first_dst_path);
-   }
+   assert(path->path[0]->deref_type == nir_deref_type_var);
+   struct hash_entry *entry = _mesa_hash_table_search(state->table,
+                                                      path->path[0]->var);
+   if (entry)
+      _foreach_aliasing(&path->path[1], cb, entry->data, state);
+}
+
+static nir_deref_instr *
+build_wildcard_deref(nir_builder *b, nir_deref_path *path,
+                     unsigned wildcard_idx)
+{
+   assert(path->path[wildcard_idx]->deref_type == nir_deref_type_array);
+
+   nir_deref_instr *tail =
+      nir_build_deref_array_wildcard(b, path->path[wildcard_idx - 1]);
+
+   for (unsigned i = wildcard_idx + 1; path->path[i]; i++)
+      tail = nir_build_deref_follower(b, tail, path->path[i]);
+
+   return tail;
 }
 
 static void
-match_state_reset(struct match_state *state)
+clobber(struct match_node *node, struct match_state *state)
 {
-   match_state_finish(state);
-   match_state_init(state);
+   node->last_overwritten = state->cur_instr;
 }
 
 static bool
 try_match_deref(nir_deref_path *base_path, int *path_array_idx,
-                nir_deref_instr *deref, int arr_idx, void *mem_ctx)
+                nir_deref_path *deref_path, int arr_idx)
 {
-   nir_deref_path deref_path;
-   nir_deref_path_init(&deref_path, deref, mem_ctx);
-
-   bool found = false;
    for (int i = 0; ; i++) {
       nir_deref_instr *b = base_path->path[i];
-      nir_deref_instr *d = deref_path.path[i];
+      nir_deref_instr *d = deref_path->path[i];
       /* They have to be the same length */
       if ((b == NULL) != (d == NULL))
-         goto fail;
+         return false;
 
       if (b == NULL)
          break;
 
       /* This can happen if one is a deref_array and the other a wildcard */
       if (b->deref_type != d->deref_type)
-         goto fail;
+         return false;;
 
       switch (b->deref_type) {
       case nir_deref_type_var:
          if (b->var != d->var)
-            goto fail;
+            return false;
          continue;
 
       case nir_deref_type_array:
@@ -137,7 +302,7 @@ try_match_deref(nir_deref_path *base_path, int *path_array_idx,
 
          /* We're at the array index but not a candidate */
          if (*path_array_idx == i)
-            goto fail;
+            return false;
 
          /* If we're not the path array index, we must match exactly.  We
           * could probably just compare SSA values and trust in copy
@@ -148,14 +313,14 @@ try_match_deref(nir_deref_path *base_path, int *path_array_idx,
              (const_b_idx && const_d_idx && b_idx == d_idx))
             continue;
 
-         goto fail;
+         return false;
 
       case nir_deref_type_array_wildcard:
          continue;
 
       case nir_deref_type_struct:
          if (b->strct.index != d->strct.index)
-            goto fail;
+            return false;
          continue;
 
       default:
@@ -166,49 +331,161 @@ try_match_deref(nir_deref_path *base_path, int *path_array_idx,
    /* If we got here without failing, we've matched.  However, it isn't an
     * array match unless we found an altered array index.
     */
-   found = *path_array_idx > 0;
+   return *path_array_idx > 0;
+}
 
-fail:
-   nir_deref_path_finish(&deref_path);
-   return found;
+static void
+handle_read(nir_deref_instr *src, struct match_state *state)
+{
+   /* We only need to create an entry for sources that might be used to form
+    * an array copy. Hence no indirects or indexing into a vector.
+    */
+   if (nir_deref_instr_has_indirect(src) ||
+       nir_deref_instr_is_known_out_of_bounds(src) ||
+       (src->deref_type == nir_deref_type_array &&
+        glsl_type_is_vector(nir_src_as_deref(src->parent)->type)))
+      return;
+
+   nir_deref_path src_path;
+   nir_deref_path_init(&src_path, src, state->dead_ctx);
+
+   /* Create a node for this source if it doesn't exist. The point of this is
+    * to know which nodes aliasing a given store we actually need to care
+    * about, to avoid creating an excessive amount of nodes.
+    */
+   node_for_path(&src_path, state);
 }
 
-static nir_deref_instr *
-build_wildcard_deref(nir_builder *b, nir_deref_path *path,
-                     unsigned wildcard_idx)
+/* The core implementation, which is used for both copies and writes. Return
+ * true if a copy is created.
+ */
+static bool
+handle_write(nir_deref_instr *dst, nir_deref_instr *src,
+             unsigned write_index, unsigned read_index,
+             struct match_state *state)
 {
-   assert(path->path[wildcard_idx]->deref_type == nir_deref_type_array);
+   nir_builder *b = &state->builder;
 
-   nir_deref_instr *tail =
-      nir_build_deref_array_wildcard(b, path->path[wildcard_idx - 1]);
+   nir_deref_path dst_path;
+   nir_deref_path_init(&dst_path, dst, state->dead_ctx);
 
-   for (unsigned i = wildcard_idx + 1; path->path[i]; i++)
-      tail = nir_build_deref_follower(b, tail, path->path[i]);
+   unsigned idx = 0;
+   for (nir_deref_instr **instr = dst_path.path; *instr; instr++, idx++) {
+      if ((*instr)->deref_type != nir_deref_type_array)
+         continue;
 
-   return tail;
+      /* Get the entry where the index is replaced by a wildcard, so that we
+       * hopefully can keep matching an array copy.
+       */
+      struct match_node *dst_node =
+         node_for_path_with_wildcard(&dst_path, idx, state);
+
+      if (!src)
+         goto reset;
+
+      if (nir_src_as_uint((*instr)->arr.index) != dst_node->next_array_idx)
+         goto reset;
+
+      if (dst_node->next_array_idx == 0) {
+         /* At this point there may be multiple source indices which are zero,
+          * so we can't pin down the actual source index. Just store it and
+          * move on.
+          */
+         nir_deref_path_init(&dst_node->first_src_path, src, state->dead_ctx);
+      } else {
+         nir_deref_path src_path;
+         nir_deref_path_init(&src_path, src, state->dead_ctx);
+         bool result = try_match_deref(&dst_node->first_src_path,
+                                       &dst_node->src_wildcard_idx,
+                                       &src_path, dst_node->next_array_idx);
+         nir_deref_path_finish(&src_path);
+         if (!result)
+            goto reset;
+      }
+
+      /* Check if an aliasing write clobbered the array after the last normal
+       * write. For example, with a sequence like this:
+       *
+       * dst[0][*] = src[0][*];
+       * dst[0][0] = 0; // invalidates the array copy dst[*][*] = src[*][*]
+       * dst[1][*] = src[1][*];
+       *
+       * Note that the second write wouldn't reset the entry for dst[*][*]
+       * by itself, but it'll be caught by this check when processing the
+       * third copy.
+       */
+      if (dst_node->last_successful_write < dst_node->last_overwritten)
+         goto reset;
+
+      dst_node->last_successful_write = write_index;
+
+      /* In this case we've successfully processed an array element. Check if
+       * this is the last, so that we can emit an array copy.
+       */
+      dst_node->next_array_idx++;
+      dst_node->first_src_read = MIN2(dst_node->first_src_read, read_index);
+      if (dst_node->next_array_idx > 1 &&
+          dst_node->next_array_idx == glsl_get_length((*(instr - 1))->type)) {
+         /* Make sure that nothing was overwritten. */
+         struct match_node *src_node =
+            node_for_path_with_wildcard(&dst_node->first_src_path,
+                                        dst_node->src_wildcard_idx,
+                                        state);
+
+         if (src_node->last_overwritten <= dst_node->first_src_read) {
+            nir_copy_deref(b, build_wildcard_deref(b, &dst_path, idx),
+                              build_wildcard_deref(b, &dst_node->first_src_path,
+                                                   dst_node->src_wildcard_idx));
+            foreach_aliasing_node(&dst_path, clobber, state);
+            return true;
+         }
+      } else {
+         continue;
+      }
+
+reset:
+      dst_node->next_array_idx = 0;
+      dst_node->src_wildcard_idx = -1;
+      dst_node->last_successful_write = 0;
+      dst_node->first_src_read = UINT32_MAX;
+   }
+
+   /* Mark everything aliasing dst_path as clobbered. This needs to happen
+    * last since in the loop above we need to know what last clobbered
+    * dst_node and this overwrites that.
+    */
+   foreach_aliasing_node(&dst_path, clobber, state);
+
+   return false;
 }
 
 static bool
 opt_find_array_copies_block(nir_builder *b, nir_block *block,
-                            unsigned *num_ssa_defs, void *mem_ctx)
+                            struct match_state *state)
 {
    bool progress = false;
 
-   struct match_state s;
-   match_state_init(&s);
+   unsigned next_index = 0;
 
-   nir_variable *dst_var = NULL;
-   unsigned prev_dst_var_last_write = *num_ssa_defs;
-   unsigned dst_var_last_write = *num_ssa_defs;
+   _mesa_hash_table_clear(state->table, NULL);
 
    nir_foreach_instr(instr, block) {
-      /* Index the SSA defs before we do anything else. */
-      nir_foreach_ssa_def(instr, index_ssa_def_cb, num_ssa_defs);
-
       if (instr->type != nir_instr_type_intrinsic)
          continue;
 
+      /* Index the instructions before we do anything else. */
+      instr->index = next_index++;
+
+      /* Save the index of this instruction */
+      state->cur_instr = instr->index;
+
       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+      if (intrin->intrinsic == nir_intrinsic_load_deref) {
+         handle_read(nir_src_as_deref(intrin->src[0]), state);
+         continue;
+      }
+
       if (intrin->intrinsic != nir_intrinsic_copy_deref &&
           intrin->intrinsic != nir_intrinsic_store_deref)
          continue;
@@ -222,127 +499,60 @@ opt_find_array_copies_block(nir_builder *b, nir_block *block,
       if (dst_deref->mode != nir_var_function_temp)
          continue;
 
-      /* We keep track of the SSA indices where the two last-written
-       * variables are written.  The prev_dst_var_last_write tells us when
-       * the last store_deref to something other than dst happened.  If the
-       * SSA def index from a load is greater than or equal to this number
-       * then we know it happened afterwards and no writes to anything other
-       * than dst occur between the load and the current instruction.
-       */
-      if (nir_deref_instr_get_variable(dst_deref) != dst_var) {
-         prev_dst_var_last_write = dst_var_last_write;
-         dst_var = nir_deref_instr_get_variable(dst_deref);
-      }
-      dst_var_last_write = *num_ssa_defs;
-
-      /* If it's a full variable store or copy, reset.  This will trigger
-       * eventually because we'll fail to match an array element.  However,
-       * it's a cheap early-exit.
+      /* If there are any known out-of-bounds writes, then we can just skip
+       * this write as it's undefined and won't contribute to building up an
+       * array copy anyways.
        */
-      if (dst_deref->deref_type == nir_deref_type_var)
-         goto reset;
+      if (nir_deref_instr_is_known_out_of_bounds(dst_deref))
+         continue;
 
       nir_deref_instr *src_deref;
+      unsigned load_index = 0;
       if (intrin->intrinsic == nir_intrinsic_copy_deref) {
          src_deref = nir_src_as_deref(intrin->src[1]);
+         load_index = intrin->instr.index;
       } else {
          assert(intrin->intrinsic == nir_intrinsic_store_deref);
-         src_deref = get_deref_for_load_src(intrin->src[1],
-                                            prev_dst_var_last_write);
+         nir_intrinsic_instr *load = nir_src_as_intrinsic(intrin->src[1]);
+         if (load == NULL || load->intrinsic != nir_intrinsic_load_deref) {
+            src_deref = NULL;
+         } else {
+            src_deref = nir_src_as_deref(load->src[0]);
+            load_index = load->instr.index;
+         }
 
-         /* We can only handle full writes */
          if (nir_intrinsic_write_mask(intrin) !=
-             (1 << glsl_get_components(dst_deref->type)) - 1)
-            goto reset;
+             (1 << glsl_get_components(dst_deref->type)) - 1) {
+            src_deref = NULL;
+         }
       }
 
-      /* If we didn't find a valid src, then we have an unknown store and it
-       * could mess things up.
-       */
-      if (src_deref == NULL)
-         goto reset;
-
       /* The source must be either local or something that's guaranteed to be
        * read-only.
        */
       const nir_variable_mode read_only_modes =
          nir_var_shader_in | nir_var_uniform | nir_var_system_value;
-      if (!(src_deref->mode & (nir_var_function_temp | read_only_modes)))
-         goto reset;
-
-      /* If we don't yet have an active copy, then make this instruction the
-       * active copy.
-       */
-      if (s.next_array_idx == 0) {
-         /* We can't combine a copy if there is any chance the source and
-          * destination will end up aliasing.  Just bail if they're the same
-          * variable.
-          */
-         if (nir_deref_instr_get_variable(src_deref) == dst_var)
-            goto reset;
-
-         /* The load/store pair is enough to guarantee the same bit size and
-          * number of components but a copy_var requires the actual types to
-          * match.
-          */
-         if (dst_deref->type != src_deref->type)
-            continue;
-
-         /* The first time we see a store, we don't know which array in the
-          * deref path is the one being copied so we just record the paths
-          * as-is and continue.  On the next iteration, it will try to match
-          * based on which array index changed.
-          */
-         nir_deref_path_init(&s.first_dst_path, dst_deref, mem_ctx);
-         nir_deref_path_init(&s.first_src_path, src_deref, mem_ctx);
-         s.next_array_idx = 1;
-         continue;
-      }
-
-      if (!try_match_deref(&s.first_dst_path, &s.dst_deref_array_idx,
-                           dst_deref, s.next_array_idx, mem_ctx) ||
-          !try_match_deref(&s.first_src_path, &s.src_deref_array_idx,
-                           src_deref, s.next_array_idx, mem_ctx))
-         goto reset;
-
-      if (s.next_array_idx == 1) {
-         /* This is our first non-trivial match.  We now have indices into
-          * the search paths so we can do a couple more checks.
-          */
-         assert(s.dst_deref_array_idx > 0 && s.src_deref_array_idx > 0);
-         const struct glsl_type *dst_arr_type =
-            s.first_dst_path.path[s.dst_deref_array_idx - 1]->type;
-         const struct glsl_type *src_arr_type =
-            s.first_src_path.path[s.src_deref_array_idx - 1]->type;
-
-         assert(glsl_type_is_array(dst_arr_type) ||
-                glsl_type_is_matrix(dst_arr_type));
-         assert(glsl_type_is_array(src_arr_type) ||
-                glsl_type_is_matrix(src_arr_type));
-
-         /* They must be the same length */
-         s.array_len = glsl_get_length(dst_arr_type);
-         if (s.array_len != glsl_get_length(src_arr_type))
-            goto reset;
+      if (src_deref &&
+          !(src_deref->mode & (nir_var_function_temp | read_only_modes))) {
+         src_deref = NULL;
       }
 
-      s.next_array_idx++;
-
-      if (s.next_array_idx == s.array_len) {
-         /* Hooray, We found a copy! */
-         b->cursor = nir_after_instr(instr);
-         nir_copy_deref(b, build_wildcard_deref(b, &s.first_dst_path,
-                                                s.dst_deref_array_idx),
-                           build_wildcard_deref(b, &s.first_src_path,
-                                                s.src_deref_array_idx));
-         match_state_reset(&s);
-         progress = true;
+      /* There must be no indirects in the source or destination and no known
+       * out-of-bounds accesses in the source, and the copy must be fully
+       * qualified, or else we can't build up the array copy. We handled
+       * out-of-bounds accesses to the dest above.
+       */
+      if (src_deref &&
+          (nir_deref_instr_has_indirect(src_deref) ||
+           nir_deref_instr_is_known_out_of_bounds(src_deref) ||
+           nir_deref_instr_has_indirect(dst_deref) ||
+           !glsl_type_is_vector_or_scalar(src_deref->type))) {
+         src_deref = NULL;
       }
 
-      continue;
-
-   reset:
-      match_state_reset(&s);
+      state->builder.cursor = nir_after_instr(instr);
+      progress |= handle_write(dst_deref, src_deref, instr->index,
+                               load_index, state);
    }
 
    return progress;
@@ -356,21 +566,17 @@ opt_find_array_copies_impl(nir_function_impl *impl)
 
    bool progress = false;
 
-   void *mem_ctx = ralloc_context(NULL);
-
-   /* We re-index the SSA defs as we go; it makes it easier to handle
-    * resetting the state machine.
-    */
-   unsigned num_ssa_defs = 0;
+   struct match_state s;
+   s.dead_ctx = ralloc_context(NULL);
+   s.table = _mesa_pointer_hash_table_create(s.dead_ctx);
+   nir_builder_init(&s.builder, impl);
 
    nir_foreach_block(block, impl) {
-      if (opt_find_array_copies_block(&b, block, &num_ssa_defs, mem_ctx))
+      if (opt_find_array_copies_block(&b, block, &s))
          progress = true;
    }
 
-   impl->ssa_alloc = num_ssa_defs;
-
-   ralloc_free(mem_ctx);
+   ralloc_free(s.dead_ctx);
 
    if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
@@ -389,8 +595,7 @@ opt_find_array_copies_impl(nir_function_impl *impl)
  * generated by spirv_to_nir for a OpLoad of a large composite followed by
  * OpStore.
  *
- * TODO: Use a hash table approach to support out-of-order and interleaved
- * copies.
+ * TODO: Support out-of-order copies.
  */
 bool
 nir_opt_find_array_copies(nir_shader *shader)