--- /dev/null
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <gtest/gtest.h>
+
+#include "nir.h"
+#include "nir_builder.h"
+
+namespace {
+
+class nir_load_store_vectorize_test : public ::testing::Test {
+protected:
+ nir_load_store_vectorize_test();
+ ~nir_load_store_vectorize_test();
+
+ unsigned count_intrinsics(nir_intrinsic_op intrinsic);
+
+ nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
+ unsigned index);
+
+ bool run_vectorizer(nir_variable_mode modes, bool cse=false);
+
+ nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
+
+ nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
+ uint32_t id, unsigned bit_size=32, unsigned components=1,
+ unsigned access=0);
+ void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
+ uint32_t id, unsigned bit_size=32, unsigned components=1,
+ unsigned wrmask=0xf, unsigned access=0);
+
+ nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
+ uint32_t id, unsigned bit_size=32, unsigned components=1,
+ unsigned access=0);
+ void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
+ uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
+ unsigned access=0);
+
+ void create_shared_load(nir_deref_instr *deref, uint32_t id,
+ unsigned bit_size=32, unsigned components=1);
+ void create_shared_store(nir_deref_instr *deref, uint32_t id,
+ unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
+
+ bool test_alu(nir_instr *instr, nir_op op);
+ bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
+
+ static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
+ unsigned num_components, unsigned high_offset,
+ nir_intrinsic_instr *low, nir_intrinsic_instr *high);
+ static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
+
+ void *mem_ctx;
+
+ nir_builder *b;
+ std::map<unsigned, nir_alu_src*> loads;
+ std::map<unsigned, nir_ssa_def*> res_map;
+};
+
+nir_load_store_vectorize_test::nir_load_store_vectorize_test()
+{
+ glsl_type_singleton_init_or_ref();
+
+ mem_ctx = ralloc_context(NULL);
+ static const nir_shader_compiler_options options = { };
+ b = rzalloc(mem_ctx, nir_builder);
+ nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_COMPUTE, &options);
+}
+
+nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
+{
+ if (HasFailure()) {
+ printf("\nShader from the failed test:\n\n");
+ nir_print_shader(b->shader, stdout);
+ }
+
+ ralloc_free(mem_ctx);
+
+ glsl_type_singleton_decref();
+}
+
+unsigned
+nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
+{
+ unsigned count = 0;
+ nir_foreach_block(block, b->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic == intrinsic)
+ count++;
+ }
+ }
+ return count;
+}
+
+nir_intrinsic_instr *
+nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
+ unsigned index)
+{
+ nir_foreach_block(block, b->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic == intrinsic) {
+ if (index == 0)
+ return intrin;
+ index--;
+ }
+ }
+ }
+ return NULL;
+}
+
+bool
+nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes, bool cse)
+{
+ if (modes & nir_var_mem_shared)
+ nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
+ bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback);
+ if (progress) {
+ nir_validate_shader(b->shader, NULL);
+ if (cse)
+ nir_opt_cse(b->shader);
+ nir_copy_prop(b->shader);
+ nir_opt_algebraic(b->shader);
+ nir_opt_constant_folding(b->shader);
+ }
+ return progress;
+}
+
+nir_ssa_def *
+nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
+{
+ if (res_map.count(binding))
+ return res_map[binding];
+
+ nir_intrinsic_instr *res = nir_intrinsic_instr_create(
+ b->shader, nir_intrinsic_vulkan_resource_index);
+ nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
+ res->num_components = 1;
+ res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
+ nir_intrinsic_set_desc_type(
+ res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
+ nir_intrinsic_set_desc_set(res, 0);
+ nir_intrinsic_set_binding(res, binding);
+ nir_builder_instr_insert(b, &res->instr);
+ res_map[binding] = &res->dest.ssa;
+ return &res->dest.ssa;
+}
+
+nir_intrinsic_instr *
+nir_load_store_vectorize_test::create_indirect_load(
+ nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
+ unsigned bit_size, unsigned components, unsigned access)
+{
+ nir_intrinsic_op intrinsic;
+ nir_ssa_def *res = NULL;
+ switch (mode) {
+ case nir_var_mem_ubo:
+ intrinsic = nir_intrinsic_load_ubo;
+ res = get_resource(binding, false);
+ break;
+ case nir_var_mem_ssbo:
+ intrinsic = nir_intrinsic_load_ssbo;
+ res = get_resource(binding, true);
+ break;
+ case nir_var_mem_push_const:
+ intrinsic = nir_intrinsic_load_push_constant;
+ break;
+ default:
+ return NULL;
+ }
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
+ nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
+ load->num_components = components;
+ if (res) {
+ load->src[0] = nir_src_for_ssa(res);
+ load->src[1] = nir_src_for_ssa(offset);
+ } else {
+ load->src[0] = nir_src_for_ssa(offset);
+ }
+ if (mode != nir_var_mem_push_const) {
+ nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0);
+ nir_intrinsic_set_access(load, (gl_access_qualifier)access);
+ }
+ nir_builder_instr_insert(b, &load->instr);
+ nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
+ loads[id] = &nir_instr_as_alu(mov)->src[0];
+
+ return load;
+}
+
+void
+nir_load_store_vectorize_test::create_indirect_store(
+ nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
+ unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
+{
+ nir_const_value values[NIR_MAX_VEC_COMPONENTS];
+ for (unsigned i = 0; i < components; i++)
+ values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
+ nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
+
+ nir_intrinsic_op intrinsic;
+ nir_ssa_def *res = NULL;
+ switch (mode) {
+ case nir_var_mem_ssbo:
+ intrinsic = nir_intrinsic_store_ssbo;
+ res = get_resource(binding, true);
+ break;
+ case nir_var_mem_shared:
+ intrinsic = nir_intrinsic_store_shared;
+ break;
+ default:
+ return;
+ }
+ nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
+ nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
+ store->num_components = components;
+ if (res) {
+ store->src[0] = nir_src_for_ssa(value);
+ store->src[1] = nir_src_for_ssa(res);
+ store->src[2] = nir_src_for_ssa(offset);
+ } else {
+ store->src[0] = nir_src_for_ssa(value);
+ store->src[1] = nir_src_for_ssa(offset);
+ }
+ nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
+ nir_intrinsic_set_access(store, (gl_access_qualifier)access);
+ nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
+ nir_builder_instr_insert(b, &store->instr);
+}
+
+nir_intrinsic_instr *
+nir_load_store_vectorize_test::create_load(
+ nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
+ unsigned bit_size, unsigned components, unsigned access)
+{
+ return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
+}
+
+void
+nir_load_store_vectorize_test::create_store(
+ nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
+ unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
+{
+ create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
+}
+
+void nir_load_store_vectorize_test::create_shared_load(
+ nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
+{
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
+ nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
+ load->num_components = components;
+ load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+ nir_builder_instr_insert(b, &load->instr);
+ nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
+ loads[id] = &nir_instr_as_alu(mov)->src[0];
+}
+
+void nir_load_store_vectorize_test::create_shared_store(
+ nir_deref_instr *deref, uint32_t id,
+ unsigned bit_size, unsigned components, unsigned wrmask)
+{
+ nir_const_value values[NIR_MAX_VEC_COMPONENTS];
+ for (unsigned i = 0; i < components; i++)
+ values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
+ nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
+
+ nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_deref);
+ nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
+ store->num_components = components;
+ store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+ store->src[1] = nir_src_for_ssa(value);
+ nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
+ nir_builder_instr_insert(b, &store->instr);
+}
+
+bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
+{
+ return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
+}
+
+bool nir_load_store_vectorize_test::test_alu_def(
+ nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
+{
+ if (instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ if (index >= nir_op_infos[alu->op].num_inputs)
+ return false;
+ if (alu->src[index].src.ssa != def)
+ return false;
+ if (alu->src[index].swizzle[0] != swizzle)
+ return false;
+
+ return true;
+}
+
+bool nir_load_store_vectorize_test::mem_vectorize_callback(
+ unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
+ nir_intrinsic_instr *low, nir_intrinsic_instr *high)
+{
+ return bit_size / 8;
+}
+
+void nir_load_store_vectorize_test::shared_type_info(
+ const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+ assert(glsl_type_is_vector_or_scalar(type));
+
+ uint32_t comp_size = glsl_type_is_boolean(type)
+ ? 4 : glsl_get_bit_size(type) / 8;
+ unsigned length = glsl_get_vector_elements(type);
+ *size = comp_size * length,
+ *align = comp_size;
+}
+} // namespace
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
+{
+ create_load(nir_var_mem_ubo, 0, 0, 0x1);
+ create_load(nir_var_mem_ubo, 0, 4, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
+{
+ create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
+ create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 3);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x1]->swizzle[1], 1);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+ ASSERT_EQ(loads[0x2]->swizzle[1], 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
+{
+ create_load(nir_var_mem_ubo, 0, 0, 0x1);
+ create_load(nir_var_mem_ubo, 0, 0, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 1);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_large)
+{
+ create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
+ create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
+{
+ create_load(nir_var_mem_push_const, 0, 0, 0x1);
+ create_load(nir_var_mem_push_const, 0, 4, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
+{
+ create_load(nir_var_mem_push_const, 0, 0, 0x1);
+ nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+ create_load(nir_var_mem_ssbo, 0, 4, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
+{
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
+ create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(load->src[1].ssa, index_base);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
+{
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
+ create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
+ create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(load->src[1].ssa, index_base_prev);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
+{
+ nir_ssa_def *inv = nir_load_local_invocation_index(b);
+ nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
+ nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
+ nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
+ create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
+ create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+
+ /* nir_opt_algebraic optimizes the imul */
+ ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
+ nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
+ ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
+ nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
+ ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
+ ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+ create_store(nir_var_mem_ssbo, 0, 4, 0x2);
+ create_load(nir_var_mem_ssbo, 0, 0, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 1);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+ create_store(nir_var_mem_ssbo, 0, 4, 0x2);
+ create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+ create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+ create_load(nir_var_mem_ssbo, 0, 0, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+/* if nir_opt_load_store_vectorize were implemented like many load/store
+ * optimization passes are (for example, nir_opt_combine_stores and
+ * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
+ * encountered, this case wouldn't be optimized.
+ * A similar test for derefs is shared_load_adjacent_store_identical. */
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+ create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+ create_load(nir_var_mem_ssbo, 0, 4, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1);
+ create_store(nir_var_mem_ssbo, 0, 4, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+ ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+ ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
+ nir_ssa_def *val = store->src[0].ssa;
+ ASSERT_EQ(val->bit_size, 32);
+ ASSERT_EQ(val->num_components, 2);
+ nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+ ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+ ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+ create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+ ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+ ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
+ nir_ssa_def *val = store->src[0].ssa;
+ ASSERT_EQ(val->bit_size, 32);
+ ASSERT_EQ(val->num_components, 3);
+ nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+ ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+ ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+ ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1);
+ create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+ ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+ ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
+ nir_ssa_def *val = store->src[0].ssa;
+ ASSERT_EQ(val->bit_size, 32);
+ ASSERT_EQ(val->num_components, 1);
+ ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+ create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
+{
+ create_load(nir_var_mem_ubo, 0, 0, 0x1);
+ nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr);
+ create_load(nir_var_mem_ubo, 0, 4, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+ nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr);
+ create_load(nir_var_mem_ssbo, 0, 4, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+/* nir_intrinsic_barrier only syncs invocations in a workgroup, it doesn't
+ * require that loads/stores complete. */
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+ nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_barrier)->instr);
+ create_load(nir_var_mem_ssbo, 0, 4, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+ nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier_shared)->instr);
+ create_load(nir_var_mem_ssbo, 0, 4, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
+ create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
+ create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 8);
+ ASSERT_EQ(load->dest.ssa.num_components, 4);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+
+ nir_ssa_def *val = loads[0x3]->src.ssa;
+ ASSERT_EQ(val->bit_size, 16);
+ ASSERT_EQ(val->num_components, 1);
+ ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
+ nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
+ nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
+ ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
+ high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
+ ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
+ ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
+ ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
+ ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+ create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 4);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x1]->swizzle[1], 1);
+
+ nir_ssa_def *val = loads[0x2]->src.ssa;
+ ASSERT_EQ(val->bit_size, 64);
+ ASSERT_EQ(val->num_components, 1);
+ ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
+ nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
+ ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
+ ASSERT_EQ(pack->src[0].swizzle[0], 2);
+ ASSERT_EQ(pack->src[0].swizzle[1], 3);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+ create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+ create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 64);
+ ASSERT_EQ(load->dest.ssa.num_components, 3);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 2);
+
+ /* pack_64_2x32(unpack_64_2x32()) is created because the 32-bit and first
+ * 64-bit loads are combined before the second 64-bit load is even considered. */
+ nir_ssa_def *val = loads[0x2]->src.ssa;
+ ASSERT_EQ(val->bit_size, 64);
+ ASSERT_EQ(val->num_components, 1);
+ ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
+ nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
+ ASSERT_TRUE(test_alu(pack->src[0].src.ssa->parent_instr, nir_op_unpack_64_2x32));
+ nir_alu_instr *unpack = nir_instr_as_alu(pack->src[0].src.ssa->parent_instr);
+ ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa);
+ ASSERT_EQ(unpack->src[0].swizzle[0], 1);
+
+ val = loads[0x1]->src.ssa;
+ ASSERT_EQ(val->bit_size, 32);
+ ASSERT_EQ(val->num_components, 2);
+ ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
+ unpack = nir_instr_as_alu(val->parent_instr);
+ ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa);
+ ASSERT_EQ(unpack->src[0].swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
+{
+ create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
+ create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 3);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x1]->swizzle[1], 1);
+
+ nir_ssa_def *val = loads[0x2]->src.ssa;
+ ASSERT_EQ(val->bit_size, 64);
+ ASSERT_EQ(val->num_components, 1);
+ ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
+ nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
+ ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
+ ASSERT_EQ(pack->src[0].swizzle[0], 1);
+ ASSERT_EQ(pack->src[0].swizzle[1], 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
+ create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
+ create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+ ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+ ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
+ nir_ssa_def *val = store->src[0].ssa;
+ ASSERT_EQ(val->bit_size, 8);
+ ASSERT_EQ(val->num_components, 4);
+ nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+ ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+ ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+ ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x30);
+ ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+ create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+ ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+ ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
+ nir_ssa_def *val = store->src[0].ssa;
+ ASSERT_EQ(val->bit_size, 32);
+ ASSERT_EQ(val->num_components, 4);
+ nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+ ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+ ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
+ ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
+ ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+ create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
+ create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+ ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+ ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
+ nir_ssa_def *val = store->src[0].ssa;
+ ASSERT_EQ(val->bit_size, 64);
+ ASSERT_EQ(val->num_components, 3);
+ nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+ ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
+ ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
+ ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
+ create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+ ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+ ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
+ nir_ssa_def *val = store->src[0].ssa;
+ ASSERT_EQ(val->bit_size, 32);
+ ASSERT_EQ(val->num_components, 3);
+ nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+ ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+ ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+ ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
+ create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
+{
+ create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
+ create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
+
+ nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
+ ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
+ ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
+ nir_ssa_def *val = store->src[0].ssa;
+ ASSERT_EQ(val->bit_size, 32);
+ ASSERT_EQ(val->num_components, 4);
+ nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+ ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+ ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
+ ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
+ ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
+{
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+ create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
+ create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+ deref = nir_src_as_deref(load->src[0]);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+ ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+ ASSERT_EQ(deref->var, var);
+
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
+{
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+ nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
+
+ create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
+ create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
+{
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+
+ create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
+ create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+ deref = nir_src_as_deref(load->src[0]);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+ ASSERT_EQ(deref->arr.index.ssa, index_base);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+ ASSERT_EQ(deref->var, var);
+
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
+{
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
+
+ create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
+ create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+ deref = nir_src_as_deref(load->src[0]);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+ ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+ ASSERT_EQ(deref->var, var);
+
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_struct)
+{
+ glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
+ glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
+
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+ create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
+ create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+ deref = nir_src_as_deref(load->src[0]);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
+ ASSERT_EQ(deref->strct.index, 0);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+ ASSERT_EQ(deref->var, var);
+
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
+{
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+ create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
+ create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
+ create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 1);
+
+ deref = nir_src_as_deref(load->src[0]);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+ ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+ ASSERT_EQ(deref->var, var);
+
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
+{
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+ create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
+ create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
+ create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
+{
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+ create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
+ create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
+ create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+ deref = nir_src_as_deref(load->src[0]);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+ ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+ ASSERT_EQ(deref->var, var);
+
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_bool)
+{
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+ create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
+ create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+ deref = nir_src_as_deref(load->src[0]);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+ ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+ ASSERT_EQ(deref->var, var);
+
+ ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
+ ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
+ ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
+ ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
+{
+ glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
+ glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
+
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+ create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
+ create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+
+ deref = nir_src_as_deref(load->src[0]);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
+ ASSERT_EQ(deref->strct.index, 0);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+ ASSERT_EQ(deref->var, var);
+
+ ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
+ ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
+{
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+ create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
+ create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+
+ nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
+ ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
+ nir_ssa_def *val = store->src[1].ssa;
+ ASSERT_EQ(val->bit_size, 32);
+ ASSERT_EQ(val->num_components, 2);
+ nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
+ ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
+ ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
+
+ deref = nir_src_as_deref(store->src[0]);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_array);
+ ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
+
+ deref = nir_deref_instr_parent(deref);
+ ASSERT_EQ(deref->deref_type, nir_deref_type_var);
+ ASSERT_EQ(deref->var, var);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
+{
+ create_load(nir_var_mem_push_const, 0, 0, 0x1);
+ nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
+{
+ create_load(nir_var_mem_push_const, 0, 0, 0x1);
+ create_load(nir_var_mem_push_const, 0, 8, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
+{
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ create_load(nir_var_mem_push_const, 0, 0, 0x1);
+ create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
+{
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ create_indirect_load(nir_var_mem_push_const, 0,
+ nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 2)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x1);
+ create_indirect_load(nir_var_mem_push_const, 0,
+ nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 3)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
+{
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
+ nir_ssa_def *low = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 12));
+ nir_ssa_def *high = nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 1)), nir_imm_int(b, 16));
+ create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
+ create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(load->src[0].ssa, low);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
+{
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+ create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
+ create_load(nir_var_mem_ssbo, 0, 0, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
+{
+ nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
+ nir_ssa_def *store_base = nir_load_local_invocation_index(b);
+ create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
+ create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
+ create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
+{
+ /* TODO: try to combine these loads */
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 4));
+ create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
+ create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+ create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 1);
+ ASSERT_EQ(load->src[1].ssa, offset);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
+{
+ /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
+ * these loads can't be combined because if index_base == 268435455, then
+ * offset == 0 because the addition would wrap around */
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
+ create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
+ create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+ create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
+{
+ /* TODO: try to combine these loads */
+ nir_ssa_def *index_base = nir_load_local_invocation_index(b);
+ nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
+ nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
+ create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
+ create_store(nir_var_mem_ssbo, 0, 0, 0x2);
+ create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 1);
+ ASSERT_EQ(load->src[1].ssa, offset);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1);
+ create_store(nir_var_mem_ssbo, 1, 0, 0x2);
+ create_load(nir_var_mem_ssbo, 0, 0, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
+{
+ create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
+ create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
+ create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 1);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
+{
+ /* TODO: implement type-based alias analysis so that these loads can be
+ * combined. this is made a bit more difficult than simply using
+ * nir_compare_derefs() because the vectorizer creates loads/stores with
+ * casted derefs. The solution would probably be to keep multiple derefs for
+ * an entry (one for each load/store combined into it). */
+ glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
+ glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
+
+ nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+
+ nir_ssa_def *index0 = nir_load_local_invocation_index(b);
+ nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
+ nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
+
+ create_shared_load(load_deref, 0x1);
+ create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
+ create_shared_load(load_deref, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 1);
+ ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, shared_alias1)
+{
+ nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
+ nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
+ nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
+
+ create_shared_load(load_deref, 0x1);
+ create_shared_store(nir_build_deref_var(b, var1), 0x2);
+ create_shared_load(load_deref, 0x3);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 1);
+ ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x3]->swizzle[0], 0);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
+{
+ create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x100000000, 64), 0x1);
+ create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x200000004, 64), 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
+
+TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
+{
+ nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
+ nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
+ nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
+ create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
+ create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}