From: Rhys Perry Date: Mon, 2 Sep 2019 15:09:24 +0000 (+0100) Subject: nir: add load/store vectorizer tests X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0a759c3be6c88fbdb945d823516172a9867836f8;p=mesa.git nir: add load/store vectorizer tests v7: run nir_opt_algebraic v9: rework the callback function v9: update alignment on all loads/stores, even if they're not vectorized v10: add tests for 64-bit offsets v10: add tests for signed offsets Signed-off-by: Rhys Perry Reviewed-by: Connor Abbott (v9) --- diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 9c42c413049..ee197ea74fb 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -336,4 +336,16 @@ if with_tests ), suite : ['compiler', 'nir'], ) + + test( + 'load_store_vectorizer', + executable( + 'load_store_vectorizer', + files('tests/load_store_vectorizer_tests.cpp'), + cpp_args : [cpp_vis_args, cpp_msvc_compat_args], + include_directories : [inc_common], + dependencies : [dep_thread, idep_gtest, idep_nir, idep_mesautil], + ), + suite : ['compiler', 'nir'], + ) endif diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp new file mode 100644 index 00000000000..2251b43fb61 --- /dev/null +++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp @@ -0,0 +1,1751 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "nir.h" +#include "nir_builder.h" + +namespace { + +class nir_load_store_vectorize_test : public ::testing::Test { +protected: + nir_load_store_vectorize_test(); + ~nir_load_store_vectorize_test(); + + unsigned count_intrinsics(nir_intrinsic_op intrinsic); + + nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic, + unsigned index); + + bool run_vectorizer(nir_variable_mode modes, bool cse=false); + + nir_ssa_def *get_resource(uint32_t binding, bool ssbo); + + nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, + uint32_t id, unsigned bit_size=32, unsigned components=1, + unsigned access=0); + void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, + uint32_t id, unsigned bit_size=32, unsigned components=1, + unsigned wrmask=0xf, unsigned access=0); + + nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset, + uint32_t id, unsigned bit_size=32, unsigned components=1, + unsigned access=0); + void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset, + uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf, + unsigned access=0); + + void create_shared_load(nir_deref_instr *deref, uint32_t id, + unsigned bit_size=32, unsigned components=1); + void create_shared_store(nir_deref_instr *deref, uint32_t id, + unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf); + + bool test_alu(nir_instr *instr, nir_op op); + bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0); + + static bool mem_vectorize_callback(unsigned align, unsigned bit_size, + unsigned num_components, unsigned high_offset, + nir_intrinsic_instr *low, nir_intrinsic_instr *high); + static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align); + + void *mem_ctx; + + nir_builder *b; + std::map loads; + std::map res_map; +}; + +nir_load_store_vectorize_test::nir_load_store_vectorize_test() +{ + glsl_type_singleton_init_or_ref(); + + mem_ctx = ralloc_context(NULL); + static const nir_shader_compiler_options options = { }; + b = rzalloc(mem_ctx, nir_builder); + nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_COMPUTE, &options); +} + +nir_load_store_vectorize_test::~nir_load_store_vectorize_test() +{ + if (HasFailure()) { + printf("\nShader from the failed test:\n\n"); + nir_print_shader(b->shader, stdout); + } + + ralloc_free(mem_ctx); + + glsl_type_singleton_decref(); +} + +unsigned +nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic) +{ + unsigned count = 0; + nir_foreach_block(block, b->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == intrinsic) + count++; + } + } + return count; +} + +nir_intrinsic_instr * +nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic, + unsigned index) +{ + nir_foreach_block(block, b->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == intrinsic) { + if (index == 0) + return intrin; + index--; + } + } + } + return NULL; +} + +bool +nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes, bool cse) +{ + if (modes & nir_var_mem_shared) + nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info); + bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback); + if (progress) { + nir_validate_shader(b->shader, NULL); + if (cse) + nir_opt_cse(b->shader); + nir_copy_prop(b->shader); + nir_opt_algebraic(b->shader); + nir_opt_constant_folding(b->shader); + } + return progress; +} + +nir_ssa_def * +nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo) +{ + if (res_map.count(binding)) + return res_map[binding]; + + nir_intrinsic_instr *res = nir_intrinsic_instr_create( + b->shader, nir_intrinsic_vulkan_resource_index); + nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL); + res->num_components = 1; + res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32)); + nir_intrinsic_set_desc_type( + res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/); + nir_intrinsic_set_desc_set(res, 0); + nir_intrinsic_set_binding(res, binding); + nir_builder_instr_insert(b, &res->instr); + res_map[binding] = &res->dest.ssa; + return &res->dest.ssa; +} + +nir_intrinsic_instr * +nir_load_store_vectorize_test::create_indirect_load( + nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id, + unsigned bit_size, unsigned components, unsigned access) +{ + nir_intrinsic_op intrinsic; + nir_ssa_def *res = NULL; + switch (mode) { + case nir_var_mem_ubo: + intrinsic = nir_intrinsic_load_ubo; + res = get_resource(binding, false); + break; + case nir_var_mem_ssbo: + intrinsic = nir_intrinsic_load_ssbo; + res = get_resource(binding, true); + break; + case nir_var_mem_push_const: + intrinsic = nir_intrinsic_load_push_constant; + break; + default: + return NULL; + } + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic); + nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL); + load->num_components = components; + if (res) { + load->src[0] = nir_src_for_ssa(res); + load->src[1] = nir_src_for_ssa(offset); + } else { + load->src[0] = nir_src_for_ssa(offset); + } + if (mode != nir_var_mem_push_const) { + nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0); + nir_intrinsic_set_access(load, (gl_access_qualifier)access); + } + nir_builder_instr_insert(b, &load->instr); + nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr; + loads[id] = &nir_instr_as_alu(mov)->src[0]; + + return load; +} + +void +nir_load_store_vectorize_test::create_indirect_store( + nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id, + unsigned bit_size, unsigned components, unsigned wrmask, unsigned access) +{ + nir_const_value values[NIR_MAX_VEC_COMPONENTS]; + for (unsigned i = 0; i < components; i++) + values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size); + nir_ssa_def *value = nir_build_imm(b, components, bit_size, values); + + nir_intrinsic_op intrinsic; + nir_ssa_def *res = NULL; + switch (mode) { + case nir_var_mem_ssbo: + intrinsic = nir_intrinsic_store_ssbo; + res = get_resource(binding, true); + break; + case nir_var_mem_shared: + intrinsic = nir_intrinsic_store_shared; + break; + default: + return; + } + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic); + nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL); + store->num_components = components; + if (res) { + store->src[0] = nir_src_for_ssa(value); + store->src[1] = nir_src_for_ssa(res); + store->src[2] = nir_src_for_ssa(offset); + } else { + store->src[0] = nir_src_for_ssa(value); + store->src[1] = nir_src_for_ssa(offset); + } + nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0); + nir_intrinsic_set_access(store, (gl_access_qualifier)access); + nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1)); + nir_builder_instr_insert(b, &store->instr); +} + +nir_intrinsic_instr * +nir_load_store_vectorize_test::create_load( + nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id, + unsigned bit_size, unsigned components, unsigned access) +{ + return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access); +} + +void +nir_load_store_vectorize_test::create_store( + nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id, + unsigned bit_size, unsigned components, unsigned wrmask, unsigned access) +{ + create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access); +} + +void nir_load_store_vectorize_test::create_shared_load( + nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components) +{ + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref); + nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL); + load->num_components = components; + load->src[0] = nir_src_for_ssa(&deref->dest.ssa); + nir_builder_instr_insert(b, &load->instr); + nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr; + loads[id] = &nir_instr_as_alu(mov)->src[0]; +} + +void nir_load_store_vectorize_test::create_shared_store( + nir_deref_instr *deref, uint32_t id, + unsigned bit_size, unsigned components, unsigned wrmask) +{ + nir_const_value values[NIR_MAX_VEC_COMPONENTS]; + for (unsigned i = 0; i < components; i++) + values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size); + nir_ssa_def *value = nir_build_imm(b, components, bit_size, values); + + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_deref); + nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL); + store->num_components = components; + store->src[0] = nir_src_for_ssa(&deref->dest.ssa); + store->src[1] = nir_src_for_ssa(value); + nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1)); + nir_builder_instr_insert(b, &store->instr); +} + +bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op) +{ + return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op; +} + +bool nir_load_store_vectorize_test::test_alu_def( + nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle) +{ + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + if (index >= nir_op_infos[alu->op].num_inputs) + return false; + if (alu->src[index].src.ssa != def) + return false; + if (alu->src[index].swizzle[0] != swizzle) + return false; + + return true; +} + +bool nir_load_store_vectorize_test::mem_vectorize_callback( + unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset, + nir_intrinsic_instr *low, nir_intrinsic_instr *high) +{ + return bit_size / 8; +} + +void nir_load_store_vectorize_test::shared_type_info( + const struct glsl_type *type, unsigned *size, unsigned *align) +{ + assert(glsl_type_is_vector_or_scalar(type)); + + uint32_t comp_size = glsl_type_is_boolean(type) + ? 4 : glsl_get_bit_size(type) / 8; + unsigned length = glsl_get_vector_elements(type); + *size = comp_size * length, + *align = comp_size; +} +} // namespace + +TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent) +{ + create_load(nir_var_mem_ubo, 0, 0, 0x1); + create_load(nir_var_mem_ubo, 0, 4, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting) +{ + create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2); + create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 3); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x1]->swizzle[1], 1); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); + ASSERT_EQ(loads[0x2]->swizzle[1], 2); +} + +TEST_F(nir_load_store_vectorize_test, ubo_load_identical) +{ + create_load(nir_var_mem_ubo, 0, 0, 0x1); + create_load(nir_var_mem_ubo, 0, 0, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 1); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 0); +} + +TEST_F(nir_load_store_vectorize_test, ubo_load_large) +{ + create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2); + create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo)); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2); +} + +TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent) +{ + create_load(nir_var_mem_push_const, 0, 0, 0x1); + create_load(nir_var_mem_push_const, 0, 4, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(nir_src_as_uint(load->src[0]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base) +{ + create_load(nir_var_mem_push_const, 0, 0, 0x1); + nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(nir_src_as_uint(load->src[0]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1); + create_load(nir_var_mem_ssbo, 0, 4, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect) +{ + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1); + create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(load->src[1].ssa, index_base); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub) +{ + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc); + create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1); + create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(load->src[1].ssa, index_base_prev); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride) +{ + nir_ssa_def *inv = nir_load_local_invocation_index(b); + nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1); + nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc); + nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc); + create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1); + create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); + + /* nir_opt_algebraic optimizes the imul */ + ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg)); + nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa; + ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl)); + nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr); + ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one); + ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1); + create_store(nir_var_mem_ssbo, 0, 4, 0x2); + create_load(nir_var_mem_ssbo, 0, 0, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 1); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x3]->swizzle[0], 0); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2); + create_store(nir_var_mem_ssbo, 0, 4, 0x2); + create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1); + create_store(nir_var_mem_ssbo, 0, 0, 0x2); + create_load(nir_var_mem_ssbo, 0, 0, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} + +/* if nir_opt_load_store_vectorize were implemented like many load/store + * optimization passes are (for example, nir_opt_combine_stores and + * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is + * encountered, this case wouldn't be optimized. + * A similar test for derefs is shared_load_adjacent_store_identical. */ +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1); + create_store(nir_var_mem_ssbo, 0, 0, 0x2); + create_load(nir_var_mem_ssbo, 0, 4, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x3]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1); + create_store(nir_var_mem_ssbo, 0, 4, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0); + ASSERT_EQ(nir_src_as_uint(store->src[2]), 0); + ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3); + nir_ssa_def *val = store->src[0].ssa; + ASSERT_EQ(val->bit_size, 32); + ASSERT_EQ(val->num_components, 2); + nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value; + ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10); + ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2); + create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0); + ASSERT_EQ(nir_src_as_uint(store->src[2]), 0); + ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7); + nir_ssa_def *val = store->src[0].ssa; + ASSERT_EQ(val->bit_size, 32); + ASSERT_EQ(val->num_components, 3); + nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value; + ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10); + ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20); + ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_identical) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1); + create_store(nir_var_mem_ssbo, 0, 0, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0); + ASSERT_EQ(nir_src_as_uint(store->src[2]), 0); + ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1); + nir_ssa_def *val = store->src[0].ssa; + ASSERT_EQ(val->bit_size, 32); + ASSERT_EQ(val->num_components, 1); + ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_large) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2); + create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); +} + +TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier) +{ + create_load(nir_var_mem_ubo, 0, 0, 0x1); + nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr); + create_load(nir_var_mem_ubo, 0, 4, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1); + nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr); + create_load(nir_var_mem_ssbo, 0, 4, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} + +/* nir_intrinsic_barrier only syncs invocations in a workgroup, it doesn't + * require that loads/stores complete. */ +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1); + nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_barrier)->instr); + create_load(nir_var_mem_ssbo, 0, 4, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1); + nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier_shared)->instr); + create_load(nir_var_mem_ssbo, 0, 4, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8); + create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8); + create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 8); + ASSERT_EQ(load->dest.ssa.num_components, 4); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); + + nir_ssa_def *val = loads[0x3]->src.ssa; + ASSERT_EQ(val->bit_size, 16); + ASSERT_EQ(val->num_components, 1); + ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior)); + nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa; + nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa; + ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl)); + high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa; + ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16)); + ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16)); + ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2)); + ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3)); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2); + create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 4); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x1]->swizzle[1], 1); + + nir_ssa_def *val = loads[0x2]->src.ssa; + ASSERT_EQ(val->bit_size, 64); + ASSERT_EQ(val->num_components, 1); + ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32)); + nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr); + ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa); + ASSERT_EQ(pack->src[0].swizzle[0], 2); + ASSERT_EQ(pack->src[0].swizzle[1], 3); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2); + create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64); + create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 64); + ASSERT_EQ(load->dest.ssa.num_components, 3); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->swizzle[0], 2); + + /* pack_64_2x32(unpack_64_2x32()) is created because the 32-bit and first + * 64-bit loads are combined before the second 64-bit load is even considered. */ + nir_ssa_def *val = loads[0x2]->src.ssa; + ASSERT_EQ(val->bit_size, 64); + ASSERT_EQ(val->num_components, 1); + ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32)); + nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr); + ASSERT_TRUE(test_alu(pack->src[0].src.ssa->parent_instr, nir_op_unpack_64_2x32)); + nir_alu_instr *unpack = nir_instr_as_alu(pack->src[0].src.ssa->parent_instr); + ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa); + ASSERT_EQ(unpack->src[0].swizzle[0], 1); + + val = loads[0x1]->src.ssa; + ASSERT_EQ(val->bit_size, 32); + ASSERT_EQ(val->num_components, 2); + ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32)); + unpack = nir_instr_as_alu(val->parent_instr); + ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa); + ASSERT_EQ(unpack->src[0].swizzle[0], 0); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64) +{ + create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2); + create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 3); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 4); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x1]->swizzle[1], 1); + + nir_ssa_def *val = loads[0x2]->src.ssa; + ASSERT_EQ(val->bit_size, 64); + ASSERT_EQ(val->num_components, 1); + ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32)); + nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr); + ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa); + ASSERT_EQ(pack->src[0].swizzle[0], 1); + ASSERT_EQ(pack->src[0].swizzle[1], 2); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8); + create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8); + create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0); + ASSERT_EQ(nir_src_as_uint(store->src[2]), 0); + ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf); + nir_ssa_def *val = store->src[0].ssa; + ASSERT_EQ(val->bit_size, 8); + ASSERT_EQ(val->num_components, 4); + nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value; + ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10); + ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20); + ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x30); + ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2); + create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0); + ASSERT_EQ(nir_src_as_uint(store->src[2]), 0); + ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf); + nir_ssa_def *val = store->src[0].ssa; + ASSERT_EQ(val->bit_size, 32); + ASSERT_EQ(val->num_components, 4); + nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value; + ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10); + ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11); + ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20); + ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2); + create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64); + create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0); + ASSERT_EQ(nir_src_as_uint(store->src[2]), 0); + ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7); + nir_ssa_def *val = store->src[0].ssa; + ASSERT_EQ(val->bit_size, 64); + ASSERT_EQ(val->num_components, 3); + nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value; + ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull); + ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20); + ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2); + create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0); + ASSERT_EQ(nir_src_as_uint(store->src[2]), 0); + ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7); + nir_ssa_def *val = store->src[0].ssa; + ASSERT_EQ(val->bit_size, 32); + ASSERT_EQ(val->num_components, 3); + nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value; + ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10); + ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20); + ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32); + create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask) +{ + create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4); + create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1); + + nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0); + ASSERT_EQ(nir_src_as_uint(store->src[2]), 0); + ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf); + nir_ssa_def *val = store->src[0].ssa; + ASSERT_EQ(val->bit_size, 32); + ASSERT_EQ(val->num_components, 4); + nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value; + ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10); + ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21); + ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22); + ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_adjacent) +{ + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + + create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1); + create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + + deref = nir_src_as_deref(load->src[0]); + ASSERT_EQ(deref->deref_type, nir_deref_type_cast); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_array); + ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_var); + ASSERT_EQ(deref->var, var); + + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit) +{ + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL); + + create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1); + create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect) +{ + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + + create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1); + create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + + deref = nir_src_as_deref(load->src[0]); + ASSERT_EQ(deref->deref_type, nir_deref_type_cast); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_array); + ASSERT_EQ(deref->arr.index.ssa, index_base); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_var); + ASSERT_EQ(deref->var, var); + + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub) +{ + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff); + + create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1); + create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + + deref = nir_src_as_deref(load->src[0]); + ASSERT_EQ(deref->deref_type, nir_deref_type_cast); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_array); + ASSERT_EQ(deref->arr.index.ssa, index_base_prev); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_var); + ASSERT_EQ(deref->var, var); + + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_struct) +{ + glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"), + glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")}; + + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + + create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1); + create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + + deref = nir_src_as_deref(load->src[0]); + ASSERT_EQ(deref->deref_type, nir_deref_type_cast); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_struct); + ASSERT_EQ(deref->strct.index, 0); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_var); + ASSERT_EQ(deref->var, var); + + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent) +{ + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + + create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1); + create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2); + create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 1); + + deref = nir_src_as_deref(load->src[0]); + ASSERT_EQ(deref->deref_type, nir_deref_type_array); + ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_var); + ASSERT_EQ(deref->var, var); + + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x3]->swizzle[0], 0); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical) +{ + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + + create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1); + create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2); + create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical) +{ + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + + create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1); + create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2); + create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + + deref = nir_src_as_deref(load->src[0]); + ASSERT_EQ(deref->deref_type, nir_deref_type_cast); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_array); + ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_var); + ASSERT_EQ(deref->var, var); + + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x3]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_bool) +{ + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + + create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1); + create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + + deref = nir_src_as_deref(load->src[0]); + ASSERT_EQ(deref->deref_type, nir_deref_type_cast); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_array); + ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_var); + ASSERT_EQ(deref->var, var); + + ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1)); + ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1)); + ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0)); + ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1)); +} + +TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed) +{ + glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"), + glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")}; + + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + + create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1); + create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + + deref = nir_src_as_deref(load->src[0]); + ASSERT_EQ(deref->deref_type, nir_deref_type_cast); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_struct); + ASSERT_EQ(deref->strct.index, 0); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_var); + ASSERT_EQ(deref->var, var); + + ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1)); + ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0)); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, shared_store_adjacent) +{ + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + + create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1); + create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1); + + nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0); + ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3); + nir_ssa_def *val = store->src[1].ssa; + ASSERT_EQ(val->bit_size, 32); + ASSERT_EQ(val->num_components, 2); + nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value; + ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10); + ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20); + + deref = nir_src_as_deref(store->src[0]); + ASSERT_EQ(deref->deref_type, nir_deref_type_cast); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_array); + ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0); + + deref = nir_deref_instr_parent(deref); + ASSERT_EQ(deref->deref_type, nir_deref_type_var); + ASSERT_EQ(deref->var, var); +} + +TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base) +{ + create_load(nir_var_mem_push_const, 0, 0, 0x1); + nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); +} + +TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct) +{ + create_load(nir_var_mem_push_const, 0, 0, 0x1); + create_load(nir_var_mem_push_const, 0, 8, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); +} + +TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect) +{ + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + create_load(nir_var_mem_push_const, 0, 0, 0x1); + create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); +} + +TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect) +{ + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + create_indirect_load(nir_var_mem_push_const, 0, + nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 2)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x1); + create_indirect_load(nir_var_mem_push_const, 0, + nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 3)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); +} + +TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect) +{ + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x; + nir_ssa_def *low = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 12)); + nir_ssa_def *high = nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 1)), nir_imm_int(b, 16)); + create_indirect_load(nir_var_mem_push_const, 0, low, 0x1); + create_indirect_load(nir_var_mem_push_const, 0, high, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(load->src[0].ssa, low); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_alias0) +{ + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + create_load(nir_var_mem_ssbo, 0, 0, 0x1); + create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2); + create_load(nir_var_mem_ssbo, 0, 0, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_alias1) +{ + nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32); + nir_ssa_def *store_base = nir_load_local_invocation_index(b); + create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1); + create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2); + create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} + +TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2) +{ + /* TODO: try to combine these loads */ + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 4)); + create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1); + create_store(nir_var_mem_ssbo, 0, 0, 0x2); + create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 1); + ASSERT_EQ(load->src[1].ssa, offset); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x3]->swizzle[0], 0); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_alias3) +{ + /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set. + * these loads can't be combined because if index_base == 268435455, then + * offset == 0 because the addition would wrap around */ + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16)); + create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1); + create_store(nir_var_mem_ssbo, 0, 0, 0x2); + create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} + +TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4) +{ + /* TODO: try to combine these loads */ + nir_ssa_def *index_base = nir_load_local_invocation_index(b); + nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16)); + nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true; + create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1); + create_store(nir_var_mem_ssbo, 0, 0, 0x2); + create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 1); + ASSERT_EQ(load->src[1].ssa, offset); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x3]->swizzle[0], 0); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_alias5) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1); + create_store(nir_var_mem_ssbo, 1, 0, 0x2); + create_load(nir_var_mem_ssbo, 0, 0, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_alias6) +{ + create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT); + create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT); + create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 1); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x3]->swizzle[0], 0); +} + +TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0) +{ + /* TODO: implement type-based alias analysis so that these loads can be + * combined. this is made a bit more difficult than simply using + * nir_compare_derefs() because the vectorizer creates loads/stores with + * casted derefs. The solution would probably be to keep multiple derefs for + * an entry (one for each load/store combined into it). */ + glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"), + glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")}; + + nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var"); + nir_deref_instr *deref = nir_build_deref_var(b, var); + + nir_ssa_def *index0 = nir_load_local_invocation_index(b); + nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32); + nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0); + + create_shared_load(load_deref, 0x1); + create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2); + create_shared_load(load_deref, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 1); + ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x3]->swizzle[0], 0); +} + +TEST_F(nir_load_store_vectorize_test, shared_alias1) +{ + nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0"); + nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1"); + nir_deref_instr *load_deref = nir_build_deref_var(b, var0); + + create_shared_load(load_deref, 0x1); + create_shared_store(nir_build_deref_var(b, var1), 0x2); + create_shared_load(load_deref, 0x3); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_shared)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 1); + ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x3]->swizzle[0], 0); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit) +{ + create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x100000000, 64), 0x1); + create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x200000004, 64), 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit) +{ + nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b)); + nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000); + nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000); + create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1); + create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +}