nir: add load/store vectorizer tests
authorRhys Perry <pendingchaos02@gmail.com>
Mon, 2 Sep 2019 15:09:24 +0000 (16:09 +0100)
committerRhys Perry <pendingchaos02@gmail.com>
Mon, 25 Nov 2019 13:59:11 +0000 (13:59 +0000)
v7: run nir_opt_algebraic
v9: rework the callback function
v9: update alignment on all loads/stores, even if they're not vectorized
v10: add tests for 64-bit offsets
v10: add tests for signed offsets

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com> (v9)
src/compiler/nir/meson.build
src/compiler/nir/tests/load_store_vectorizer_tests.cpp [new file with mode: 0644]

index 9c42c413049bc9bf67c253c8e15cd4431fea4ae6..ee197ea74fb27f792b9c6312679ee90a3e5bb5ee 100644 (file)
@@ -336,4 +336,16 @@ if with_tests
     ),
     suite : ['compiler', 'nir'],
   )
+
+  test(
+    'load_store_vectorizer',
+    executable(
+      'load_store_vectorizer',
+      files('tests/load_store_vectorizer_tests.cpp'),
+      cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+      include_directories : [inc_common],
+      dependencies : [dep_thread, idep_gtest, idep_nir, idep_mesautil],
+    ),
+    suite : ['compiler', 'nir'],
+  )
 endif
diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
new file mode 100644 (file)
index 0000000..2251b43
--- /dev/null
@@ -0,0 +1,1751 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <gtest/gtest.h>
+
+#include "nir.h"
+#include "nir_builder.h"
+
+namespace {
+
+class nir_load_store_vectorize_test : public ::testing::Test {
+protected:
+   nir_load_store_vectorize_test();
+   ~nir_load_store_vectorize_test();
+
+   unsigned count_intrinsics(nir_intrinsic_op intrinsic);
+
+   nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
+                                      unsigned index);
+
+   bool run_vectorizer(nir_variable_mode modes, bool cse=false);
+
+   nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
+
+   nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
+                                             uint32_t id, unsigned bit_size=32, unsigned components=1,
+                                             unsigned access=0);
+   void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
+                              uint32_t id, unsigned bit_size=32, unsigned components=1,
+                              unsigned wrmask=0xf, unsigned access=0);
+
+   nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
+                                    uint32_t id, unsigned bit_size=32, unsigned components=1,
+                                    unsigned access=0);
+   void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
+                     uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
+                     unsigned access=0);
+
+   void create_shared_load(nir_deref_instr *deref, uint32_t id,
+                           unsigned bit_size=32, unsigned components=1);
+   void create_shared_store(nir_deref_instr *deref, uint32_t id,
+                            unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
+
+   bool test_alu(nir_instr *instr, nir_op op);
+   bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
+
+   static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
+                                      unsigned num_components, unsigned high_offset,
+                                      nir_intrinsic_instr *low, nir_intrinsic_instr *high);
+   static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
+
+   void *mem_ctx;
+
+   nir_builder *b;
+   std::map<unsigned, nir_alu_src*> loads;
+   std::map<unsigned, nir_ssa_def*> res_map;
+};
+
+nir_load_store_vectorize_test::nir_load_store_vectorize_test()
+{
+   glsl_type_singleton_init_or_ref();
+
+   mem_ctx = ralloc_context(NULL);
+   static const nir_shader_compiler_options options = { };
+   b = rzalloc(mem_ctx, nir_builder);
+   nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_COMPUTE, &options);
+}
+
+nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
+{
+   if (HasFailure()) {
+      printf("\nShader from the failed test:\n\n");
+      nir_print_shader(b->shader, stdout);
+   }
+
+   ralloc_free(mem_ctx);
+
+   glsl_type_singleton_decref();
+}
+
+unsigned
+nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
+{
+   unsigned count = 0;
+   nir_foreach_block(block, b->impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         if (intrin->intrinsic == intrinsic)
+            count++;
+      }
+   }
+   return count;
+}
+
+nir_intrinsic_instr *
+nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
+                             unsigned index)
+{
+   nir_foreach_block(block, b->impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         if (intrin->intrinsic == intrinsic) {
+            if (index == 0)
+               return intrin;
+            index--;
+         }
+      }
+   }
+   return NULL;
+}
+
+bool
+nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes, bool cse)
+{
+   if (modes & nir_var_mem_shared)
+      nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
+   bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback);
+   if (progress) {
+      nir_validate_shader(b->shader, NULL);
+      if (cse)
+         nir_opt_cse(b->shader);
+      nir_copy_prop(b->shader);
+      nir_opt_algebraic(b->shader);
+      nir_opt_constant_folding(b->shader);
+   }
+   return progress;
+}
+
+nir_ssa_def *
+nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
+{
+   if (res_map.count(binding))
+      return res_map[binding];
+
+   nir_intrinsic_instr *res = nir_intrinsic_instr_create(
+      b->shader, nir_intrinsic_vulkan_resource_index);
+   nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
+   res->num_components = 1;
+   res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
+   nir_intrinsic_set_desc_type(
+      res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
+   nir_intrinsic_set_desc_set(res, 0);
+   nir_intrinsic_set_binding(res, binding);
+   nir_builder_instr_insert(b, &res->instr);
+   res_map[binding] = &res->dest.ssa;
+   return &res->dest.ssa;
+}
+
+nir_intrinsic_instr *
+nir_load_store_vectorize_test::create_indirect_load(
+   nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
+   unsigned bit_size, unsigned components, unsigned access)
+{
+   nir_intrinsic_op intrinsic;
+   nir_ssa_def *res = NULL;
+   switch (mode) {
+   case nir_var_mem_ubo:
+      intrinsic = nir_intrinsic_load_ubo;
+      res = get_resource(binding, false);
+      break;
+   case nir_var_mem_ssbo:
+      intrinsic = nir_intrinsic_load_ssbo;
+      res = get_resource(binding, true);
+      break;
+   case nir_var_mem_push_const:
+      intrinsic = nir_intrinsic_load_push_constant;
+      break;
+   default:
+      return NULL;
+   }
+   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
+   nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
+   load->num_components = components;
+   if (res) {
+      load->src[0] = nir_src_for_ssa(res);
+      load->src[1] = nir_src_for_ssa(offset);
+   } else {
+      load->src[0] = nir_src_for_ssa(offset);
+   }
+   if (mode != nir_var_mem_push_const) {
+      nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0);
+      nir_intrinsic_set_access(load, (gl_access_qualifier)access);
+   }
+   nir_builder_instr_insert(b, &load->instr);
+   nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
+   loads[id] = &nir_instr_as_alu(mov)->src[0];
+
+   return load;
+}
+
+void
+nir_load_store_vectorize_test::create_indirect_store(
+   nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
+   unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
+{
+   nir_const_value values[NIR_MAX_VEC_COMPONENTS];
+   for (unsigned i = 0; i < components; i++)
+      values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
+   nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
+
+   nir_intrinsic_op intrinsic;
+   nir_ssa_def *res = NULL;
+   switch (mode) {
+   case nir_var_mem_ssbo:
+      intrinsic = nir_intrinsic_store_ssbo;
+      res = get_resource(binding, true);
+      break;
+   case nir_var_mem_shared:
+      intrinsic = nir_intrinsic_store_shared;
+      break;
+   default:
+      return;
+   }
+   nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
+   nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
+   store->num_components = components;
+   if (res) {
+      store->src[0] = nir_src_for_ssa(value);
+      store->src[1] = nir_src_for_ssa(res);
+      store->src[2] = nir_src_for_ssa(offset);
+   } else {
+      store->src[0] = nir_src_for_ssa(value);
+      store->src[1] = nir_src_for_ssa(offset);
+   }
+   nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
+   nir_intrinsic_set_access(store, (gl_access_qualifier)access);
+   nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
+   nir_builder_instr_insert(b, &store->instr);
+}
+
+nir_intrinsic_instr *
+nir_load_store_vectorize_test::create_load(
+   nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
+   unsigned bit_size, unsigned components, unsigned access)
+{
+   return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
+}
+
+void
+nir_load_store_vectorize_test::create_store(
+   nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
+   unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
+{
+   create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
+}
+
+void nir_load_store_vectorize_test::create_shared_load(
+   nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
+{
+   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
+   nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
+   load->num_components = components;
+   load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+   nir_builder_instr_insert(b, &load->instr);
+   nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
+   loads[id] = &nir_instr_as_alu(mov)->src[0];
+}
+
+void nir_load_store_vectorize_test::create_shared_store(
+   nir_deref_instr *deref, uint32_t id,
+   unsigned bit_size, unsigned components, unsigned wrmask)
+{
+   nir_const_value values[NIR_MAX_VEC_COMPONENTS];
+   for (unsigned i = 0; i < components; i++)
+      values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
+   nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
+
+   nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_deref);
+   nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
+   store->num_components = components;
+   store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+   store->src[1] = nir_src_for_ssa(value);
+   nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
+   nir_builder_instr_insert(b, &store->instr);
+}
+
+bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
+{
+   return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
+}
+
+bool nir_load_store_vectorize_test::test_alu_def(
+   nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
+{
+   if (instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+   if (index >= nir_op_infos[alu->op].num_inputs)
+      return false;
+   if (alu->src[index].src.ssa != def)
+      return false;
+   if (alu->src[index].swizzle[0] != swizzle)
+      return false;
+
+   return true;
+}
+
+bool nir_load_store_vectorize_test::mem_vectorize_callback(
+   unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
+   nir_intrinsic_instr *low, nir_intrinsic_instr *high)
+{
+   return bit_size / 8;
+}
+
+void nir_load_store_vectorize_test::shared_type_info(
+   const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+   assert(glsl_type_is_vector_or_scalar(type));
+
+   uint32_t comp_size = glsl_type_is_boolean(type)
+      ? 4 : glsl_get_bit_size(type) / 8;
+   unsigned length = glsl_get_vector_elements(type);
+   *size = comp_size * length,
+   *align = comp_size;
+}
+} // namespace
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
+{
+   create_load(nir_var_mem_ubo, 0, 0, 0x1);
+   create_load(nir_var_mem_ubo, 0, 4, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
+{
+   create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
+   create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 3);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x1]->swizzle[1], 1);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+   ASSERT_EQ(loads[0x2]->swizzle[1], 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
+{
+   create_load(nir_var_mem_ubo, 0, 0, 0x1);
+   create_load(nir_var_mem_ubo, 0, 0, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 1);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_large)
+{
+   create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
+   create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
+{
+   create_load(nir_var_mem_push_const, 0, 0, 0x1);
+   create_load(nir_var_mem_push_const, 0, 4, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+   ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
+{
+   create_load(nir_var_mem_push_const, 0, 0, 0x1);
+   nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+   ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+   create_load(nir_var_mem_ssbo, 0, 4, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
+{
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
+   create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+   ASSERT_EQ(load->src[1].ssa, index_base);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
+{
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
+   create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
+   create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+   ASSERT_EQ(load->src[1].ssa, index_base_prev);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
+{
+   nir_ssa_def *inv = nir_load_local_invocation_index(b);
+   nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
+   nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
+   nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
+   create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
+   create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+
+   /* nir_opt_algebraic optimizes the imul */
+   ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
+   nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
+   ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
+   nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
+   ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
+   ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+   create_store(nir_var_mem_ssbo, 0, 4, 0x2);
+   create_load(nir_var_mem_ssbo, 0, 0, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 1);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+   create_store(nir_var_mem_ssbo, 0, 4, 0x2);
+   create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+   create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+   create_load(nir_var_mem_ssbo, 0, 0, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+/* if nir_opt_load_store_vectorize were implemented like many load/store
+ * optimization passes are (for example, nir_opt_combine_stores and
+ * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
+ * encountered, this case wouldn't be optimized.
+ * A similar test for derefs is shared_load_adjacent_store_identical. */
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+   create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+   create_load(nir_var_mem_ssbo, 0, 4, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1);
+   create_store(nir_var_mem_ssbo, 0, 4, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+   ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
+   nir_ssa_def *val = store->src[0].ssa;
+   ASSERT_EQ(val->bit_size, 32);
+   ASSERT_EQ(val->num_components, 2);
+   nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+   ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+   ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+   create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+   ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
+   nir_ssa_def *val = store->src[0].ssa;
+   ASSERT_EQ(val->bit_size, 32);
+   ASSERT_EQ(val->num_components, 3);
+   nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+   ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+   ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+   ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1);
+   create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+   ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
+   nir_ssa_def *val = store->src[0].ssa;
+   ASSERT_EQ(val->bit_size, 32);
+   ASSERT_EQ(val->num_components, 1);
+   ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+   create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
+{
+   create_load(nir_var_mem_ubo, 0, 0, 0x1);
+   nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr);
+   create_load(nir_var_mem_ubo, 0, 4, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+   nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr);
+   create_load(nir_var_mem_ssbo, 0, 4, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+/* nir_intrinsic_barrier only syncs invocations in a workgroup, it doesn't
+ * require that loads/stores complete. */
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+   nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_barrier)->instr);
+   create_load(nir_var_mem_ssbo, 0, 4, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+   nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier_shared)->instr);
+   create_load(nir_var_mem_ssbo, 0, 4, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
+   create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
+   create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 8);
+   ASSERT_EQ(load->dest.ssa.num_components, 4);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+
+   nir_ssa_def *val = loads[0x3]->src.ssa;
+   ASSERT_EQ(val->bit_size, 16);
+   ASSERT_EQ(val->num_components, 1);
+   ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
+   nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
+   nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
+   ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
+   high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
+   ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
+   ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
+   ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
+   ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+   create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 4);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x1]->swizzle[1], 1);
+
+   nir_ssa_def *val = loads[0x2]->src.ssa;
+   ASSERT_EQ(val->bit_size, 64);
+   ASSERT_EQ(val->num_components, 1);
+   ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
+   nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
+   ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
+   ASSERT_EQ(pack->src[0].swizzle[0], 2);
+   ASSERT_EQ(pack->src[0].swizzle[1], 3);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+   create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+   create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 64);
+   ASSERT_EQ(load->dest.ssa.num_components, 3);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 2);
+
+   /* pack_64_2x32(unpack_64_2x32()) is created because the 32-bit and first
+    * 64-bit loads are combined before the second 64-bit load is even considered. */
+   nir_ssa_def *val = loads[0x2]->src.ssa;
+   ASSERT_EQ(val->bit_size, 64);
+   ASSERT_EQ(val->num_components, 1);
+   ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
+   nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
+   ASSERT_TRUE(test_alu(pack->src[0].src.ssa->parent_instr, nir_op_unpack_64_2x32));
+   nir_alu_instr *unpack = nir_instr_as_alu(pack->src[0].src.ssa->parent_instr);
+   ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa);
+   ASSERT_EQ(unpack->src[0].swizzle[0], 1);
+
+   val = loads[0x1]->src.ssa;
+   ASSERT_EQ(val->bit_size, 32);
+   ASSERT_EQ(val->num_components, 2);
+   ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
+   unpack = nir_instr_as_alu(val->parent_instr);
+   ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa);
+   ASSERT_EQ(unpack->src[0].swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
+{
+   create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
+   create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 3);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x1]->swizzle[1], 1);
+
+   nir_ssa_def *val = loads[0x2]->src.ssa;
+   ASSERT_EQ(val->bit_size, 64);
+   ASSERT_EQ(val->num_components, 1);
+   ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
+   nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
+   ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
+   ASSERT_EQ(pack->src[0].swizzle[0], 1);
+   ASSERT_EQ(pack->src[0].swizzle[1], 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
+   create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
+   create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+   ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
+   nir_ssa_def *val = store->src[0].ssa;
+   ASSERT_EQ(val->bit_size, 8);
+   ASSERT_EQ(val->num_components, 4);
+   nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+   ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+   ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+   ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x30);
+   ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+   create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+   ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
+   nir_ssa_def *val = store->src[0].ssa;
+   ASSERT_EQ(val->bit_size, 32);
+   ASSERT_EQ(val->num_components, 4);
+   nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+   ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+   ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
+   ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
+   ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+   create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+   create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+   ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
+   nir_ssa_def *val = store->src[0].ssa;
+   ASSERT_EQ(val->bit_size, 64);
+   ASSERT_EQ(val->num_components, 3);
+   nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+   ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
+   ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
+   ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+   create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+   ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
+   nir_ssa_def *val = store->src[0].ssa;
+   ASSERT_EQ(val->bit_size, 32);
+   ASSERT_EQ(val->num_components, 3);
+   nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+   ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+   ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+   ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
+   create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
+{
+   create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
+   create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+   nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+   ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
+   nir_ssa_def *val = store->src[0].ssa;
+   ASSERT_EQ(val->bit_size, 32);
+   ASSERT_EQ(val->num_components, 4);
+   nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+   ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+   ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
+   ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
+   ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
+{
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+   create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
+   create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+   deref = nir_src_as_deref(load->src[0]);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+   ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+   ASSERT_EQ(deref->var, var);
+
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
+{
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+   nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
+
+   create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
+   create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
+{
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+
+   create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
+   create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+   deref = nir_src_as_deref(load->src[0]);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+   ASSERT_EQ(deref->arr.index.ssa, index_base);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+   ASSERT_EQ(deref->var, var);
+
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
+{
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
+
+   create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
+   create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+   deref = nir_src_as_deref(load->src[0]);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+   ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+   ASSERT_EQ(deref->var, var);
+
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_struct)
+{
+   glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
+                                  glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
+
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+   create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
+   create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+   deref = nir_src_as_deref(load->src[0]);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
+   ASSERT_EQ(deref->strct.index, 0);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+   ASSERT_EQ(deref->var, var);
+
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
+{
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+   create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
+   create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
+   create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 1);
+
+   deref = nir_src_as_deref(load->src[0]);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+   ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+   ASSERT_EQ(deref->var, var);
+
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
+{
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+   create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
+   create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
+   create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
+{
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+   create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
+   create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
+   create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+   deref = nir_src_as_deref(load->src[0]);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+   ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+   ASSERT_EQ(deref->var, var);
+
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_bool)
+{
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+   create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
+   create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+   deref = nir_src_as_deref(load->src[0]);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+   ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+   ASSERT_EQ(deref->var, var);
+
+   ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
+   ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
+   ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
+   ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
+{
+   glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
+                                  glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
+
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+   create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
+   create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+   deref = nir_src_as_deref(load->src[0]);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
+   ASSERT_EQ(deref->strct.index, 0);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+   ASSERT_EQ(deref->var, var);
+
+   ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
+   ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
+{
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+   create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
+   create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+   nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
+   nir_ssa_def *val = store->src[1].ssa;
+   ASSERT_EQ(val->bit_size, 32);
+   ASSERT_EQ(val->num_components, 2);
+   nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+   ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+   ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+
+   deref = nir_src_as_deref(store->src[0]);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+   ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+   deref = nir_deref_instr_parent(deref);
+   ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+   ASSERT_EQ(deref->var, var);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
+{
+   create_load(nir_var_mem_push_const, 0, 0, 0x1);
+   nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
+{
+   create_load(nir_var_mem_push_const, 0, 0, 0x1);
+   create_load(nir_var_mem_push_const, 0, 8, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
+{
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   create_load(nir_var_mem_push_const, 0, 0, 0x1);
+   create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
+{
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   create_indirect_load(nir_var_mem_push_const, 0,
+      nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 2)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x1);
+   create_indirect_load(nir_var_mem_push_const, 0,
+      nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 3)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
+{
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
+   nir_ssa_def *low = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 12));
+   nir_ssa_def *high = nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 1)), nir_imm_int(b, 16));
+   create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
+   create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 2);
+   ASSERT_EQ(load->src[0].ssa, low);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
+{
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+   create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
+   create_load(nir_var_mem_ssbo, 0, 0, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
+{
+   nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
+   nir_ssa_def *store_base = nir_load_local_invocation_index(b);
+   create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
+   create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
+   create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
+{
+   /* TODO: try to combine these loads */
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 4));
+   create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
+   create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+   create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 1);
+   ASSERT_EQ(load->src[1].ssa, offset);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
+{
+   /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
+    * these loads can't be combined because if index_base == 268435455, then
+    * offset == 0 because the addition would wrap around */
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
+   create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
+   create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+   create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
+{
+   /* TODO: try to combine these loads */
+   nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+   nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
+   nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
+   create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
+   create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+   create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 1);
+   ASSERT_EQ(load->src[1].ssa, offset);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+   create_store(nir_var_mem_ssbo, 1, 0, 0x2);
+   create_load(nir_var_mem_ssbo, 0, 0, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
+{
+   create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
+   create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
+   create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 1);
+   ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
+{
+   /* TODO: implement type-based alias analysis so that these loads can be
+    * combined. this is made a bit more difficult than simply using
+    * nir_compare_derefs() because the vectorizer creates loads/stores with
+    * casted derefs. The solution would probably be to keep multiple derefs for
+    * an entry (one for each load/store combined into it). */
+   glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
+                                  glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
+
+   nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+   nir_ssa_def *index0 = nir_load_local_invocation_index(b);
+   nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
+   nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
+
+   create_shared_load(load_deref, 0x1);
+   create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
+   create_shared_load(load_deref, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 1);
+   ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_alias1)
+{
+   nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
+   nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
+   nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
+
+   create_shared_load(load_deref, 0x1);
+   create_shared_store(nir_build_deref_var(b, var1), 0x2);
+   create_shared_load(load_deref, 0x3);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+   nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+   ASSERT_EQ(load->dest.ssa.bit_size, 32);
+   ASSERT_EQ(load->dest.ssa.num_components, 1);
+   ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
+   ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+   ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+   ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
+{
+   create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x100000000, 64), 0x1);
+   create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x200000004, 64), 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
+{
+   nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
+   nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
+   nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
+   create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
+   create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}