2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include <gtest/gtest.h>
27 #include "nir_builder.h"
31 class nir_load_store_vectorize_test
: public ::testing::Test
{
33 nir_load_store_vectorize_test();
34 ~nir_load_store_vectorize_test();
36 unsigned count_intrinsics(nir_intrinsic_op intrinsic
);
38 nir_intrinsic_instr
*get_intrinsic(nir_intrinsic_op intrinsic
,
41 bool run_vectorizer(nir_variable_mode modes
, bool cse
=false);
43 nir_ssa_def
*get_resource(uint32_t binding
, bool ssbo
);
45 nir_intrinsic_instr
*create_indirect_load(nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
,
46 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
48 void create_indirect_store(nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
,
49 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
50 unsigned wrmask
=0xf, unsigned access
=0);
52 nir_intrinsic_instr
*create_load(nir_variable_mode mode
, uint32_t binding
, uint32_t offset
,
53 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
55 void create_store(nir_variable_mode mode
, uint32_t binding
, uint32_t offset
,
56 uint32_t id
, unsigned bit_size
=32, unsigned components
=1, unsigned wrmask
=0xf,
59 void create_shared_load(nir_deref_instr
*deref
, uint32_t id
,
60 unsigned bit_size
=32, unsigned components
=1);
61 void create_shared_store(nir_deref_instr
*deref
, uint32_t id
,
62 unsigned bit_size
=32, unsigned components
=1, unsigned wrmask
=0xf);
64 bool test_alu(nir_instr
*instr
, nir_op op
);
65 bool test_alu_def(nir_instr
*instr
, unsigned index
, nir_ssa_def
*def
, unsigned swizzle
=0);
67 static bool mem_vectorize_callback(unsigned align
, unsigned bit_size
,
68 unsigned num_components
, unsigned high_offset
,
69 nir_intrinsic_instr
*low
, nir_intrinsic_instr
*high
);
70 static void shared_type_info(const struct glsl_type
*type
, unsigned *size
, unsigned *align
);
75 std::map
<unsigned, nir_alu_src
*> loads
;
76 std::map
<unsigned, nir_ssa_def
*> res_map
;
79 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
81 glsl_type_singleton_init_or_ref();
83 mem_ctx
= ralloc_context(NULL
);
84 static const nir_shader_compiler_options options
= { };
85 b
= rzalloc(mem_ctx
, nir_builder
);
86 nir_builder_init_simple_shader(b
, mem_ctx
, MESA_SHADER_COMPUTE
, &options
);
89 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
92 printf("\nShader from the failed test:\n\n");
93 nir_print_shader(b
->shader
, stdout
);
98 glsl_type_singleton_decref();
102 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic
)
105 nir_foreach_block(block
, b
->impl
) {
106 nir_foreach_instr(instr
, block
) {
107 if (instr
->type
!= nir_instr_type_intrinsic
)
109 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
110 if (intrin
->intrinsic
== intrinsic
)
117 nir_intrinsic_instr
*
118 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic
,
121 nir_foreach_block(block
, b
->impl
) {
122 nir_foreach_instr(instr
, block
) {
123 if (instr
->type
!= nir_instr_type_intrinsic
)
125 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
126 if (intrin
->intrinsic
== intrinsic
) {
137 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes
, bool cse
)
139 if (modes
& nir_var_mem_shared
)
140 nir_lower_vars_to_explicit_types(b
->shader
, nir_var_mem_shared
, shared_type_info
);
141 bool progress
= nir_opt_load_store_vectorize(b
->shader
, modes
, mem_vectorize_callback
);
143 nir_validate_shader(b
->shader
, NULL
);
145 nir_opt_cse(b
->shader
);
146 nir_copy_prop(b
->shader
);
147 nir_opt_algebraic(b
->shader
);
148 nir_opt_constant_folding(b
->shader
);
154 nir_load_store_vectorize_test::get_resource(uint32_t binding
, bool ssbo
)
156 if (res_map
.count(binding
))
157 return res_map
[binding
];
159 nir_intrinsic_instr
*res
= nir_intrinsic_instr_create(
160 b
->shader
, nir_intrinsic_vulkan_resource_index
);
161 nir_ssa_dest_init(&res
->instr
, &res
->dest
, 1, 32, NULL
);
162 res
->num_components
= 1;
163 res
->src
[0] = nir_src_for_ssa(nir_imm_zero(b
, 1, 32));
164 nir_intrinsic_set_desc_type(
165 res
, ssbo
? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
166 nir_intrinsic_set_desc_set(res
, 0);
167 nir_intrinsic_set_binding(res
, binding
);
168 nir_builder_instr_insert(b
, &res
->instr
);
169 res_map
[binding
] = &res
->dest
.ssa
;
170 return &res
->dest
.ssa
;
173 nir_intrinsic_instr
*
174 nir_load_store_vectorize_test::create_indirect_load(
175 nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
, uint32_t id
,
176 unsigned bit_size
, unsigned components
, unsigned access
)
178 nir_intrinsic_op intrinsic
;
179 nir_ssa_def
*res
= NULL
;
181 case nir_var_mem_ubo
:
182 intrinsic
= nir_intrinsic_load_ubo
;
183 res
= get_resource(binding
, false);
185 case nir_var_mem_ssbo
:
186 intrinsic
= nir_intrinsic_load_ssbo
;
187 res
= get_resource(binding
, true);
189 case nir_var_mem_push_const
:
190 intrinsic
= nir_intrinsic_load_push_constant
;
195 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, intrinsic
);
196 nir_ssa_dest_init(&load
->instr
, &load
->dest
, components
, bit_size
, NULL
);
197 load
->num_components
= components
;
199 load
->src
[0] = nir_src_for_ssa(res
);
200 load
->src
[1] = nir_src_for_ssa(offset
);
202 load
->src
[0] = nir_src_for_ssa(offset
);
204 if (mode
!= nir_var_mem_push_const
) {
205 nir_intrinsic_set_align(load
, (bit_size
== 1 ? 32 : bit_size
) / 8, 0);
206 nir_intrinsic_set_access(load
, (gl_access_qualifier
)access
);
208 nir_builder_instr_insert(b
, &load
->instr
);
209 nir_instr
*mov
= nir_mov(b
, &load
->dest
.ssa
)->parent_instr
;
210 loads
[id
] = &nir_instr_as_alu(mov
)->src
[0];
216 nir_load_store_vectorize_test::create_indirect_store(
217 nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
, uint32_t id
,
218 unsigned bit_size
, unsigned components
, unsigned wrmask
, unsigned access
)
220 nir_const_value values
[NIR_MAX_VEC_COMPONENTS
];
221 for (unsigned i
= 0; i
< components
; i
++)
222 values
[i
] = nir_const_value_for_raw_uint((id
<< 4) | i
, bit_size
);
223 nir_ssa_def
*value
= nir_build_imm(b
, components
, bit_size
, values
);
225 nir_intrinsic_op intrinsic
;
226 nir_ssa_def
*res
= NULL
;
228 case nir_var_mem_ssbo
:
229 intrinsic
= nir_intrinsic_store_ssbo
;
230 res
= get_resource(binding
, true);
232 case nir_var_mem_shared
:
233 intrinsic
= nir_intrinsic_store_shared
;
238 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
->shader
, intrinsic
);
239 nir_ssa_dest_init(&store
->instr
, &store
->dest
, components
, bit_size
, NULL
);
240 store
->num_components
= components
;
242 store
->src
[0] = nir_src_for_ssa(value
);
243 store
->src
[1] = nir_src_for_ssa(res
);
244 store
->src
[2] = nir_src_for_ssa(offset
);
246 store
->src
[0] = nir_src_for_ssa(value
);
247 store
->src
[1] = nir_src_for_ssa(offset
);
249 nir_intrinsic_set_align(store
, (bit_size
== 1 ? 32 : bit_size
) / 8, 0);
250 nir_intrinsic_set_access(store
, (gl_access_qualifier
)access
);
251 nir_intrinsic_set_write_mask(store
, wrmask
& ((1 << components
) - 1));
252 nir_builder_instr_insert(b
, &store
->instr
);
255 nir_intrinsic_instr
*
256 nir_load_store_vectorize_test::create_load(
257 nir_variable_mode mode
, uint32_t binding
, uint32_t offset
, uint32_t id
,
258 unsigned bit_size
, unsigned components
, unsigned access
)
260 return create_indirect_load(mode
, binding
, nir_imm_int(b
, offset
), id
, bit_size
, components
, access
);
264 nir_load_store_vectorize_test::create_store(
265 nir_variable_mode mode
, uint32_t binding
, uint32_t offset
, uint32_t id
,
266 unsigned bit_size
, unsigned components
, unsigned wrmask
, unsigned access
)
268 create_indirect_store(mode
, binding
, nir_imm_int(b
, offset
), id
, bit_size
, components
, wrmask
, access
);
271 void nir_load_store_vectorize_test::create_shared_load(
272 nir_deref_instr
*deref
, uint32_t id
, unsigned bit_size
, unsigned components
)
274 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_deref
);
275 nir_ssa_dest_init(&load
->instr
, &load
->dest
, components
, bit_size
, NULL
);
276 load
->num_components
= components
;
277 load
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
278 nir_builder_instr_insert(b
, &load
->instr
);
279 nir_instr
*mov
= nir_mov(b
, &load
->dest
.ssa
)->parent_instr
;
280 loads
[id
] = &nir_instr_as_alu(mov
)->src
[0];
283 void nir_load_store_vectorize_test::create_shared_store(
284 nir_deref_instr
*deref
, uint32_t id
,
285 unsigned bit_size
, unsigned components
, unsigned wrmask
)
287 nir_const_value values
[NIR_MAX_VEC_COMPONENTS
];
288 for (unsigned i
= 0; i
< components
; i
++)
289 values
[i
] = nir_const_value_for_raw_uint((id
<< 4) | i
, bit_size
);
290 nir_ssa_def
*value
= nir_build_imm(b
, components
, bit_size
, values
);
292 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_deref
);
293 nir_ssa_dest_init(&store
->instr
, &store
->dest
, components
, bit_size
, NULL
);
294 store
->num_components
= components
;
295 store
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
296 store
->src
[1] = nir_src_for_ssa(value
);
297 nir_intrinsic_set_write_mask(store
, wrmask
& ((1 << components
) - 1));
298 nir_builder_instr_insert(b
, &store
->instr
);
301 bool nir_load_store_vectorize_test::test_alu(nir_instr
*instr
, nir_op op
)
303 return instr
->type
== nir_instr_type_alu
&& nir_instr_as_alu(instr
)->op
== op
;
306 bool nir_load_store_vectorize_test::test_alu_def(
307 nir_instr
*instr
, unsigned index
, nir_ssa_def
*def
, unsigned swizzle
)
309 if (instr
->type
!= nir_instr_type_alu
)
312 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
314 if (index
>= nir_op_infos
[alu
->op
].num_inputs
)
316 if (alu
->src
[index
].src
.ssa
!= def
)
318 if (alu
->src
[index
].swizzle
[0] != swizzle
)
324 bool nir_load_store_vectorize_test::mem_vectorize_callback(
325 unsigned align
, unsigned bit_size
, unsigned num_components
, unsigned high_offset
,
326 nir_intrinsic_instr
*low
, nir_intrinsic_instr
*high
)
331 void nir_load_store_vectorize_test::shared_type_info(
332 const struct glsl_type
*type
, unsigned *size
, unsigned *align
)
334 assert(glsl_type_is_vector_or_scalar(type
));
336 uint32_t comp_size
= glsl_type_is_boolean(type
)
337 ? 4 : glsl_get_bit_size(type
) / 8;
338 unsigned length
= glsl_get_vector_elements(type
);
339 *size
= comp_size
* length
,
344 TEST_F(nir_load_store_vectorize_test
, ubo_load_adjacent
)
346 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
347 create_load(nir_var_mem_ubo
, 0, 4, 0x2);
349 nir_validate_shader(b
->shader
, NULL
);
350 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
352 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
354 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
356 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
357 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
358 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
359 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
360 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
361 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
362 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
363 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
366 TEST_F(nir_load_store_vectorize_test
, ubo_load_intersecting
)
368 create_load(nir_var_mem_ubo
, 0, 0, 0x1, 32, 2);
369 create_load(nir_var_mem_ubo
, 0, 4, 0x2, 32, 2);
371 nir_validate_shader(b
->shader
, NULL
);
372 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
374 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
376 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
378 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
379 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
380 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
381 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
382 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
383 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
384 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
385 ASSERT_EQ(loads
[0x1]->swizzle
[1], 1);
386 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
387 ASSERT_EQ(loads
[0x2]->swizzle
[1], 2);
390 TEST_F(nir_load_store_vectorize_test
, ubo_load_identical
)
392 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
393 create_load(nir_var_mem_ubo
, 0, 0, 0x2);
395 nir_validate_shader(b
->shader
, NULL
);
396 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
398 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
400 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
402 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
403 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
404 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
405 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
406 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
407 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
408 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
409 ASSERT_EQ(loads
[0x2]->swizzle
[0], 0);
412 TEST_F(nir_load_store_vectorize_test
, ubo_load_large
)
414 create_load(nir_var_mem_ubo
, 0, 0, 0x1, 32, 2);
415 create_load(nir_var_mem_ubo
, 0, 8, 0x2, 32, 3);
417 nir_validate_shader(b
->shader
, NULL
);
418 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
420 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
422 nir_validate_shader(b
->shader
, NULL
);
423 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
426 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent
)
428 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
429 create_load(nir_var_mem_push_const
, 0, 4, 0x2);
431 nir_validate_shader(b
->shader
, NULL
);
432 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
434 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
436 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
438 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
439 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
440 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
441 ASSERT_EQ(nir_src_as_uint(load
->src
[0]), 0);
442 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
443 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
444 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
445 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
448 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent_base
)
450 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
451 nir_intrinsic_set_base(create_load(nir_var_mem_push_const
, 0, 0, 0x2), 4);
453 nir_validate_shader(b
->shader
, NULL
);
454 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
456 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
458 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
460 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
461 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
462 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
463 ASSERT_EQ(nir_src_as_uint(load
->src
[0]), 0);
464 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
465 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
466 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
467 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
470 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent
)
472 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
473 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
475 nir_validate_shader(b
->shader
, NULL
);
476 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
478 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
480 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
482 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
483 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
484 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
485 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
486 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
487 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
488 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
489 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
492 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect
)
494 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
495 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x1);
496 create_indirect_load(nir_var_mem_ssbo
, 0, nir_iadd_imm(b
, index_base
, 4), 0x2);
498 nir_validate_shader(b
->shader
, NULL
);
499 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
501 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
503 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
505 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
506 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
507 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
508 ASSERT_EQ(load
->src
[1].ssa
, index_base
);
509 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
510 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
511 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
512 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
515 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect_sub
)
517 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
518 nir_ssa_def
*index_base_prev
= nir_iadd_imm(b
, index_base
, 0xfffffffc);
519 create_indirect_load(nir_var_mem_ssbo
, 0, index_base_prev
, 0x1);
520 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x2);
522 nir_validate_shader(b
->shader
, NULL
);
523 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
525 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
527 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
529 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
530 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
531 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
532 ASSERT_EQ(load
->src
[1].ssa
, index_base_prev
);
533 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
534 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
535 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
536 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
539 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect_neg_stride
)
541 nir_ssa_def
*inv
= nir_load_local_invocation_index(b
);
542 nir_ssa_def
*inv_plus_one
= nir_iadd_imm(b
, inv
, 1);
543 nir_ssa_def
*index_base
= nir_imul_imm(b
, inv
, 0xfffffffc);
544 nir_ssa_def
*index_base_prev
= nir_imul_imm(b
, inv_plus_one
, 0xfffffffc);
545 create_indirect_load(nir_var_mem_ssbo
, 0, index_base_prev
, 0x1);
546 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x2);
548 nir_validate_shader(b
->shader
, NULL
);
549 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
551 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
553 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
555 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
556 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
557 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
558 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
559 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
560 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
561 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
563 /* nir_opt_algebraic optimizes the imul */
564 ASSERT_TRUE(test_alu(load
->src
[1].ssa
->parent_instr
, nir_op_ineg
));
565 nir_ssa_def
*offset
= nir_instr_as_alu(load
->src
[1].ssa
->parent_instr
)->src
[0].src
.ssa
;
566 ASSERT_TRUE(test_alu(offset
->parent_instr
, nir_op_ishl
));
567 nir_alu_instr
*shl
= nir_instr_as_alu(offset
->parent_instr
);
568 ASSERT_EQ(shl
->src
[0].src
.ssa
, inv_plus_one
);
569 ASSERT_EQ(nir_src_as_uint(shl
->src
[1].src
), 2);
572 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_adjacent
)
574 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
575 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
576 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
578 nir_validate_shader(b
->shader
, NULL
);
579 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
581 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
583 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
585 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
586 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
587 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
588 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
589 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
590 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
591 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
592 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
595 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_intersecting
)
597 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
598 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
599 create_load(nir_var_mem_ssbo
, 0, 0, 0x3, 32, 2);
601 nir_validate_shader(b
->shader
, NULL
);
602 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
604 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
606 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
609 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_identical
)
611 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
612 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
613 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
615 nir_validate_shader(b
->shader
, NULL
);
616 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
618 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
620 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
623 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical_load_identical
)
625 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
626 create_load(nir_var_mem_ssbo
, 0, 0, 0x2);
627 create_store(nir_var_mem_ssbo
, 0, 0, 0x3);
629 nir_validate_shader(b
->shader
, NULL
);
630 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
632 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
634 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
637 /* if nir_opt_load_store_vectorize were implemented like many load/store
638 * optimization passes are (for example, nir_opt_combine_stores and
639 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
640 * encountered, this case wouldn't be optimized.
641 * A similar test for derefs is shared_load_adjacent_store_identical. */
642 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_store_identical
)
644 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
645 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
646 create_load(nir_var_mem_ssbo
, 0, 4, 0x3);
648 nir_validate_shader(b
->shader
, NULL
);
649 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
650 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
652 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
654 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
655 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
657 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
658 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
659 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
660 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
661 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
662 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
663 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
664 ASSERT_EQ(loads
[0x3]->swizzle
[0], 1);
667 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent
)
669 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
670 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
672 nir_validate_shader(b
->shader
, NULL
);
673 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
675 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
677 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
679 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
680 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
681 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x3);
682 nir_ssa_def
*val
= store
->src
[0].ssa
;
683 ASSERT_EQ(val
->bit_size
, 32);
684 ASSERT_EQ(val
->num_components
, 2);
685 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
686 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
687 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
690 TEST_F(nir_load_store_vectorize_test
, ssbo_store_intersecting
)
692 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
693 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 32, 2);
695 nir_validate_shader(b
->shader
, NULL
);
696 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
698 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
700 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
702 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
703 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
704 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
705 nir_ssa_def
*val
= store
->src
[0].ssa
;
706 ASSERT_EQ(val
->bit_size
, 32);
707 ASSERT_EQ(val
->num_components
, 3);
708 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
709 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
710 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
711 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x21);
714 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical
)
716 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
717 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
719 nir_validate_shader(b
->shader
, NULL
);
720 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
722 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
724 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
726 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
727 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
728 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x1);
729 nir_ssa_def
*val
= store
->src
[0].ssa
;
730 ASSERT_EQ(val
->bit_size
, 32);
731 ASSERT_EQ(val
->num_components
, 1);
732 ASSERT_EQ(nir_src_as_uint(store
->src
[0]), 0x20);
735 TEST_F(nir_load_store_vectorize_test
, ssbo_store_large
)
737 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
738 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 32, 3);
740 nir_validate_shader(b
->shader
, NULL
);
741 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
743 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
745 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
748 TEST_F(nir_load_store_vectorize_test
, ubo_load_adjacent_memory_barrier
)
750 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
751 nir_builder_instr_insert(b
, &nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_memory_barrier
)->instr
);
752 create_load(nir_var_mem_ubo
, 0, 4, 0x2);
754 nir_validate_shader(b
->shader
, NULL
);
755 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
757 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
759 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
762 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_memory_barrier
)
764 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
765 nir_builder_instr_insert(b
, &nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_memory_barrier
)->instr
);
766 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
768 nir_validate_shader(b
->shader
, NULL
);
769 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
771 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
773 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
776 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
777 * doesn't require that loads/stores complete.
779 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_barrier
)
781 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
782 nir_builder_instr_insert(b
, &nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_control_barrier
)->instr
);
783 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
785 nir_validate_shader(b
->shader
, NULL
);
786 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
788 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
790 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
793 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_memory_barrier_shared
)
795 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
796 nir_builder_instr_insert(b
, &nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_memory_barrier_shared
)->instr
);
797 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
799 nir_validate_shader(b
->shader
, NULL
);
800 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
802 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
804 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
807 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_8_8_16
)
809 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 8);
810 create_load(nir_var_mem_ssbo
, 0, 1, 0x2, 8);
811 create_load(nir_var_mem_ssbo
, 0, 2, 0x3, 16);
813 nir_validate_shader(b
->shader
, NULL
);
814 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 3);
816 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
818 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
820 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
821 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 8);
822 ASSERT_EQ(load
->dest
.ssa
.num_components
, 4);
823 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
824 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
825 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
826 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
827 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
829 nir_ssa_def
*val
= loads
[0x3]->src
.ssa
;
830 ASSERT_EQ(val
->bit_size
, 16);
831 ASSERT_EQ(val
->num_components
, 1);
832 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_ior
));
833 nir_ssa_def
*low
= nir_instr_as_alu(val
->parent_instr
)->src
[0].src
.ssa
;
834 nir_ssa_def
*high
= nir_instr_as_alu(val
->parent_instr
)->src
[1].src
.ssa
;
835 ASSERT_TRUE(test_alu(high
->parent_instr
, nir_op_ishl
));
836 high
= nir_instr_as_alu(high
->parent_instr
)->src
[0].src
.ssa
;
837 ASSERT_TRUE(test_alu(low
->parent_instr
, nir_op_u2u16
));
838 ASSERT_TRUE(test_alu(high
->parent_instr
, nir_op_u2u16
));
839 ASSERT_TRUE(test_alu_def(low
->parent_instr
, 0, &load
->dest
.ssa
, 2));
840 ASSERT_TRUE(test_alu_def(high
->parent_instr
, 0, &load
->dest
.ssa
, 3));
843 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_32_32_64
)
845 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
846 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
848 nir_validate_shader(b
->shader
, NULL
);
849 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
851 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
853 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
855 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
856 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
857 ASSERT_EQ(load
->dest
.ssa
.num_components
, 4);
858 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
859 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
860 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
861 ASSERT_EQ(loads
[0x1]->swizzle
[1], 1);
863 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
864 ASSERT_EQ(val
->bit_size
, 64);
865 ASSERT_EQ(val
->num_components
, 1);
866 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
867 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
868 ASSERT_EQ(pack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
869 ASSERT_EQ(pack
->src
[0].swizzle
[0], 2);
870 ASSERT_EQ(pack
->src
[0].swizzle
[1], 3);
873 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_32_32_64_64
)
875 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
876 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
877 create_load(nir_var_mem_ssbo
, 0, 16, 0x3, 64);
879 nir_validate_shader(b
->shader
, NULL
);
880 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 3);
882 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
, true));
884 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
886 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
887 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 64);
888 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
889 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
890 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
891 ASSERT_EQ(loads
[0x3]->swizzle
[0], 2);
893 /* pack_64_2x32(unpack_64_2x32()) is created because the 32-bit and first
894 * 64-bit loads are combined before the second 64-bit load is even considered. */
895 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
896 ASSERT_EQ(val
->bit_size
, 64);
897 ASSERT_EQ(val
->num_components
, 1);
898 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
899 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
900 ASSERT_TRUE(test_alu(pack
->src
[0].src
.ssa
->parent_instr
, nir_op_unpack_64_2x32
));
901 nir_alu_instr
*unpack
= nir_instr_as_alu(pack
->src
[0].src
.ssa
->parent_instr
);
902 ASSERT_EQ(unpack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
903 ASSERT_EQ(unpack
->src
[0].swizzle
[0], 1);
905 val
= loads
[0x1]->src
.ssa
;
906 ASSERT_EQ(val
->bit_size
, 32);
907 ASSERT_EQ(val
->num_components
, 2);
908 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_unpack_64_2x32
));
909 unpack
= nir_instr_as_alu(val
->parent_instr
);
910 ASSERT_EQ(unpack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
911 ASSERT_EQ(unpack
->src
[0].swizzle
[0], 0);
914 TEST_F(nir_load_store_vectorize_test
, ssbo_load_intersecting_32_32_64
)
916 create_load(nir_var_mem_ssbo
, 0, 4, 0x1, 32, 2);
917 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
919 nir_validate_shader(b
->shader
, NULL
);
920 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
922 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
924 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
926 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
927 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
928 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
929 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 4);
930 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
931 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
932 ASSERT_EQ(loads
[0x1]->swizzle
[1], 1);
934 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
935 ASSERT_EQ(val
->bit_size
, 64);
936 ASSERT_EQ(val
->num_components
, 1);
937 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
938 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
939 ASSERT_EQ(pack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
940 ASSERT_EQ(pack
->src
[0].swizzle
[0], 1);
941 ASSERT_EQ(pack
->src
[0].swizzle
[1], 2);
944 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_8_8_16
)
946 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 8);
947 create_store(nir_var_mem_ssbo
, 0, 1, 0x2, 8);
948 create_store(nir_var_mem_ssbo
, 0, 2, 0x3, 16);
950 nir_validate_shader(b
->shader
, NULL
);
951 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 3);
953 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
955 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
957 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
958 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
959 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
960 nir_ssa_def
*val
= store
->src
[0].ssa
;
961 ASSERT_EQ(val
->bit_size
, 8);
962 ASSERT_EQ(val
->num_components
, 4);
963 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
964 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
965 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
966 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x30);
967 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 32), 0x0);
970 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_32_64
)
972 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
973 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
975 nir_validate_shader(b
->shader
, NULL
);
976 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
978 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
980 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
982 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
983 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
984 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
985 nir_ssa_def
*val
= store
->src
[0].ssa
;
986 ASSERT_EQ(val
->bit_size
, 32);
987 ASSERT_EQ(val
->num_components
, 4);
988 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
989 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
990 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x11);
991 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x20);
992 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 32), 0x0);
995 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_32_64_64
)
997 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
998 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
999 create_store(nir_var_mem_ssbo
, 0, 16, 0x3, 64);
1001 nir_validate_shader(b
->shader
, NULL
);
1002 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 3);
1004 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1006 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1008 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1009 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1010 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
1011 nir_ssa_def
*val
= store
->src
[0].ssa
;
1012 ASSERT_EQ(val
->bit_size
, 64);
1013 ASSERT_EQ(val
->num_components
, 3);
1014 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1015 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 64), 0x1100000010ull
);
1016 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 64), 0x20);
1017 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 64), 0x30);
1020 TEST_F(nir_load_store_vectorize_test
, ssbo_store_intersecting_32_32_64
)
1022 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
1023 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 64);
1025 nir_validate_shader(b
->shader
, NULL
);
1026 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1028 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1030 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1032 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1033 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1034 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
1035 nir_ssa_def
*val
= store
->src
[0].ssa
;
1036 ASSERT_EQ(val
->bit_size
, 32);
1037 ASSERT_EQ(val
->num_components
, 3);
1038 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1039 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1040 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
1041 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x0);
1044 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_64
)
1046 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32);
1047 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 64, 2);
1049 nir_validate_shader(b
->shader
, NULL
);
1050 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1052 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1054 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1057 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical_wrmask
)
1059 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 4, 1 | 4);
1060 create_store(nir_var_mem_ssbo
, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1062 nir_validate_shader(b
->shader
, NULL
);
1063 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1065 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1067 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1069 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1070 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1071 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
1072 nir_ssa_def
*val
= store
->src
[0].ssa
;
1073 ASSERT_EQ(val
->bit_size
, 32);
1074 ASSERT_EQ(val
->num_components
, 4);
1075 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1076 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1077 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x21);
1078 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x22);
1079 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 32), 0x23);
1082 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent
)
1084 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1085 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1087 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1088 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1090 nir_validate_shader(b
->shader
, NULL
);
1091 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1093 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1095 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1097 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1098 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1099 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1101 deref
= nir_src_as_deref(load
->src
[0]);
1102 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1104 deref
= nir_deref_instr_parent(deref
);
1105 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1106 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1108 deref
= nir_deref_instr_parent(deref
);
1109 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1110 ASSERT_EQ(deref
->var
, var
);
1112 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1113 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1114 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1115 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1118 TEST_F(nir_load_store_vectorize_test
, shared_load_distant_64bit
)
1120 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1121 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1122 nir_ssa_dest_init(&deref
->instr
, &deref
->dest
, 1, 64, NULL
);
1124 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0x100000000), 0x1);
1125 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0x200000001), 0x2);
1127 nir_validate_shader(b
->shader
, NULL
);
1128 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1130 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared
));
1132 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1135 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_indirect
)
1137 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1138 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1139 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1141 create_shared_load(nir_build_deref_array(b
, deref
, index_base
), 0x1);
1142 create_shared_load(nir_build_deref_array(b
, deref
, nir_iadd_imm(b
, index_base
, 1)), 0x2);
1144 nir_validate_shader(b
->shader
, NULL
);
1145 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1147 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1149 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1151 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1152 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1153 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1155 deref
= nir_src_as_deref(load
->src
[0]);
1156 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1158 deref
= nir_deref_instr_parent(deref
);
1159 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1160 ASSERT_EQ(deref
->arr
.index
.ssa
, index_base
);
1162 deref
= nir_deref_instr_parent(deref
);
1163 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1164 ASSERT_EQ(deref
->var
, var
);
1166 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1167 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1168 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1169 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1172 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_indirect_sub
)
1174 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1175 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1176 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1177 nir_ssa_def
*index_base_prev
= nir_iadd_imm(b
, index_base
, 0xffffffff);
1179 create_shared_load(nir_build_deref_array(b
, deref
, index_base_prev
), 0x1);
1180 create_shared_load(nir_build_deref_array(b
, deref
, index_base
), 0x2);
1182 nir_validate_shader(b
->shader
, NULL
);
1183 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1185 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1187 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1189 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1190 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1191 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1193 deref
= nir_src_as_deref(load
->src
[0]);
1194 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1196 deref
= nir_deref_instr_parent(deref
);
1197 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1198 ASSERT_EQ(deref
->arr
.index
.ssa
, index_base_prev
);
1200 deref
= nir_deref_instr_parent(deref
);
1201 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1202 ASSERT_EQ(deref
->var
, var
);
1204 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1205 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1206 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1207 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1210 TEST_F(nir_load_store_vectorize_test
, shared_load_struct
)
1212 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1213 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1215 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1216 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1218 create_shared_load(nir_build_deref_struct(b
, deref
, 0), 0x1);
1219 create_shared_load(nir_build_deref_array_imm(b
, nir_build_deref_struct(b
, deref
, 1), 0), 0x2);
1221 nir_validate_shader(b
->shader
, NULL
);
1222 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1224 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1226 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1228 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1229 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1230 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1232 deref
= nir_src_as_deref(load
->src
[0]);
1233 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1235 deref
= nir_deref_instr_parent(deref
);
1236 ASSERT_EQ(deref
->deref_type
, nir_deref_type_struct
);
1237 ASSERT_EQ(deref
->strct
.index
, 0);
1239 deref
= nir_deref_instr_parent(deref
);
1240 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1241 ASSERT_EQ(deref
->var
, var
);
1243 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1244 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1245 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1246 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1249 TEST_F(nir_load_store_vectorize_test
, shared_load_identical_store_adjacent
)
1251 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1252 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1254 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1255 create_shared_store(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1256 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x3);
1258 nir_validate_shader(b
->shader
, NULL
);
1259 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1260 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1262 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1264 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1265 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1267 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1268 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1269 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1271 deref
= nir_src_as_deref(load
->src
[0]);
1272 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1273 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1275 deref
= nir_deref_instr_parent(deref
);
1276 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1277 ASSERT_EQ(deref
->var
, var
);
1279 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1280 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1281 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1282 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1285 TEST_F(nir_load_store_vectorize_test
, shared_load_identical_store_identical
)
1287 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1288 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1290 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1291 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x2);
1292 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x3);
1294 nir_validate_shader(b
->shader
, NULL
);
1295 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1297 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared
));
1299 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1302 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_store_identical
)
1304 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1305 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1307 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1308 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x2);
1309 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x3);
1311 nir_validate_shader(b
->shader
, NULL
);
1312 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1313 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1315 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1317 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1318 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1320 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1321 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1322 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1324 deref
= nir_src_as_deref(load
->src
[0]);
1325 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1327 deref
= nir_deref_instr_parent(deref
);
1328 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1329 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1331 deref
= nir_deref_instr_parent(deref
);
1332 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1333 ASSERT_EQ(deref
->var
, var
);
1335 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1336 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1337 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1338 ASSERT_EQ(loads
[0x3]->swizzle
[0], 1);
1341 TEST_F(nir_load_store_vectorize_test
, shared_load_bool
)
1343 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1344 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1346 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1, 1);
1347 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x2, 1);
1349 nir_validate_shader(b
->shader
, NULL
);
1350 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1352 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1354 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1356 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1357 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1358 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1360 deref
= nir_src_as_deref(load
->src
[0]);
1361 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1363 deref
= nir_deref_instr_parent(deref
);
1364 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1365 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1367 deref
= nir_deref_instr_parent(deref
);
1368 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1369 ASSERT_EQ(deref
->var
, var
);
1371 ASSERT_TRUE(test_alu(loads
[0x1]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1372 ASSERT_TRUE(test_alu(loads
[0x2]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1373 ASSERT_TRUE(test_alu_def(loads
[0x1]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 0));
1374 ASSERT_TRUE(test_alu_def(loads
[0x2]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 1));
1377 TEST_F(nir_load_store_vectorize_test
, shared_load_bool_mixed
)
1379 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1380 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1382 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1383 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1385 create_shared_load(nir_build_deref_struct(b
, deref
, 0), 0x1, 1);
1386 create_shared_load(nir_build_deref_array_imm(b
, nir_build_deref_struct(b
, deref
, 1), 0), 0x2);
1388 nir_validate_shader(b
->shader
, NULL
);
1389 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1391 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1393 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1395 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1396 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1397 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1399 deref
= nir_src_as_deref(load
->src
[0]);
1400 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1402 deref
= nir_deref_instr_parent(deref
);
1403 ASSERT_EQ(deref
->deref_type
, nir_deref_type_struct
);
1404 ASSERT_EQ(deref
->strct
.index
, 0);
1406 deref
= nir_deref_instr_parent(deref
);
1407 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1408 ASSERT_EQ(deref
->var
, var
);
1410 ASSERT_TRUE(test_alu(loads
[0x1]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1411 ASSERT_TRUE(test_alu_def(loads
[0x1]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 0));
1412 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1413 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1416 TEST_F(nir_load_store_vectorize_test
, shared_store_adjacent
)
1418 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1419 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1421 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1422 create_shared_store(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1424 nir_validate_shader(b
->shader
, NULL
);
1425 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 2);
1427 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1429 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1431 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_deref
, 0);
1432 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x3);
1433 nir_ssa_def
*val
= store
->src
[1].ssa
;
1434 ASSERT_EQ(val
->bit_size
, 32);
1435 ASSERT_EQ(val
->num_components
, 2);
1436 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1437 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1438 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
1440 deref
= nir_src_as_deref(store
->src
[0]);
1441 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1443 deref
= nir_deref_instr_parent(deref
);
1444 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1445 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1447 deref
= nir_deref_instr_parent(deref
);
1448 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1449 ASSERT_EQ(deref
->var
, var
);
1452 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_base
)
1454 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1455 nir_intrinsic_set_base(create_load(nir_var_mem_push_const
, 0, 4, 0x2), 4);
1457 nir_validate_shader(b
->shader
, NULL
);
1458 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1460 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1462 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1465 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_direct_direct
)
1467 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1468 create_load(nir_var_mem_push_const
, 0, 8, 0x2);
1470 nir_validate_shader(b
->shader
, NULL
);
1471 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1473 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1475 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1478 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_direct_indirect
)
1480 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1481 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1482 create_indirect_load(nir_var_mem_push_const
, 0, index_base
, 0x2);
1484 nir_validate_shader(b
->shader
, NULL
);
1485 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1487 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1489 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1492 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_indirect_indirect
)
1494 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1495 create_indirect_load(nir_var_mem_push_const
, 0,
1496 nir_iadd(b
, nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 2)), nir_imm_int(b
, 16)), nir_imm_int(b
, 32)), 0x1);
1497 create_indirect_load(nir_var_mem_push_const
, 0,
1498 nir_iadd(b
, nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 3)), nir_imm_int(b
, 16)), nir_imm_int(b
, 32)), 0x2);
1500 nir_validate_shader(b
->shader
, NULL
);
1501 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1503 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1505 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1508 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent_complex_indirect
)
1510 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1511 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1512 nir_ssa_def
*low
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 12));
1513 nir_ssa_def
*high
= nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 1)), nir_imm_int(b
, 16));
1514 create_indirect_load(nir_var_mem_push_const
, 0, low
, 0x1);
1515 create_indirect_load(nir_var_mem_push_const
, 0, high
, 0x2);
1517 nir_validate_shader(b
->shader
, NULL
);
1518 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1520 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
1522 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
1524 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
1525 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1526 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1527 ASSERT_EQ(load
->src
[0].ssa
, low
);
1528 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1529 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1530 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1531 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1534 TEST_F(nir_load_store_vectorize_test
, ssbo_alias0
)
1536 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1537 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
1538 create_indirect_store(nir_var_mem_ssbo
, 0, index_base
, 0x2);
1539 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
1541 nir_validate_shader(b
->shader
, NULL
);
1542 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1544 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1546 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1549 TEST_F(nir_load_store_vectorize_test
, ssbo_alias1
)
1551 nir_ssa_def
*load_base
= nir_load_global_invocation_index(b
, 32);
1552 nir_ssa_def
*store_base
= nir_load_local_invocation_index(b
);
1553 create_indirect_load(nir_var_mem_ssbo
, 0, load_base
, 0x1);
1554 create_indirect_store(nir_var_mem_ssbo
, 0, store_base
, 0x2);
1555 create_indirect_load(nir_var_mem_ssbo
, 0, load_base
, 0x3);
1557 nir_validate_shader(b
->shader
, NULL
);
1558 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1560 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1562 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1565 TEST_F(nir_load_store_vectorize_test
, DISABLED_ssbo_alias2
)
1567 /* TODO: try to combine these loads */
1568 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1569 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 4));
1570 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1571 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1572 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1574 nir_validate_shader(b
->shader
, NULL
);
1575 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1577 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1579 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1581 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1582 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1583 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1584 ASSERT_EQ(load
->src
[1].ssa
, offset
);
1585 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1586 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1587 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1588 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1591 TEST_F(nir_load_store_vectorize_test
, ssbo_alias3
)
1593 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1594 * these loads can't be combined because if index_base == 268435455, then
1595 * offset == 0 because the addition would wrap around */
1596 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1597 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 16));
1598 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1599 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1600 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1602 nir_validate_shader(b
->shader
, NULL
);
1603 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1605 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1607 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1610 TEST_F(nir_load_store_vectorize_test
, DISABLED_ssbo_alias4
)
1612 /* TODO: try to combine these loads */
1613 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1614 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 16));
1615 nir_instr_as_alu(offset
->parent_instr
)->no_unsigned_wrap
= true;
1616 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1617 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1618 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1620 nir_validate_shader(b
->shader
, NULL
);
1621 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1623 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1625 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1627 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1628 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1629 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1630 ASSERT_EQ(load
->src
[1].ssa
, offset
);
1631 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1632 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1633 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1634 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1637 TEST_F(nir_load_store_vectorize_test
, ssbo_alias5
)
1639 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
1640 create_store(nir_var_mem_ssbo
, 1, 0, 0x2);
1641 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
1643 nir_validate_shader(b
->shader
, NULL
);
1644 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1646 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1648 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1651 TEST_F(nir_load_store_vectorize_test
, ssbo_alias6
)
1653 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT
);
1654 create_store(nir_var_mem_ssbo
, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT
);
1655 create_load(nir_var_mem_ssbo
, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT
);
1657 nir_validate_shader(b
->shader
, NULL
);
1658 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1660 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1662 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1664 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1665 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1666 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1667 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
1668 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1669 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1670 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1671 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1674 TEST_F(nir_load_store_vectorize_test
, DISABLED_shared_alias0
)
1676 /* TODO: implement type-based alias analysis so that these loads can be
1677 * combined. this is made a bit more difficult than simply using
1678 * nir_compare_derefs() because the vectorizer creates loads/stores with
1679 * casted derefs. The solution would probably be to keep multiple derefs for
1680 * an entry (one for each load/store combined into it). */
1681 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1682 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1684 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1685 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1687 nir_ssa_def
*index0
= nir_load_local_invocation_index(b
);
1688 nir_ssa_def
*index1
= nir_load_global_invocation_index(b
, 32);
1689 nir_deref_instr
*load_deref
= nir_build_deref_array(b
, nir_build_deref_struct(b
, deref
, 0), index0
);
1691 create_shared_load(load_deref
, 0x1);
1692 create_shared_store(nir_build_deref_array(b
, nir_build_deref_struct(b
, deref
, 1), index1
), 0x2);
1693 create_shared_load(load_deref
, 0x3);
1695 nir_validate_shader(b
->shader
, NULL
);
1696 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1698 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1700 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1702 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1703 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1704 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1705 ASSERT_EQ(load
->src
[0].ssa
, &load_deref
->dest
.ssa
);
1706 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1707 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1708 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1709 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1712 TEST_F(nir_load_store_vectorize_test
, shared_alias1
)
1714 nir_variable
*var0
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_uint_type(), "var0");
1715 nir_variable
*var1
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_uint_type(), "var1");
1716 nir_deref_instr
*load_deref
= nir_build_deref_var(b
, var0
);
1718 create_shared_load(load_deref
, 0x1);
1719 create_shared_store(nir_build_deref_var(b
, var1
), 0x2);
1720 create_shared_load(load_deref
, 0x3);
1722 nir_validate_shader(b
->shader
, NULL
);
1723 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1725 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1727 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1729 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1730 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1731 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1732 ASSERT_EQ(load
->src
[0].ssa
, &load_deref
->dest
.ssa
);
1733 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1734 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1735 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1736 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1739 TEST_F(nir_load_store_vectorize_test
, ssbo_load_distant_64bit
)
1741 create_indirect_load(nir_var_mem_ssbo
, 0, nir_imm_intN_t(b
, 0x100000000, 64), 0x1);
1742 create_indirect_load(nir_var_mem_ssbo
, 0, nir_imm_intN_t(b
, 0x200000004, 64), 0x2);
1744 nir_validate_shader(b
->shader
, NULL
);
1745 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1747 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1749 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1752 TEST_F(nir_load_store_vectorize_test
, ssbo_load_distant_indirect_64bit
)
1754 nir_ssa_def
*index_base
= nir_u2u64(b
, nir_load_local_invocation_index(b
));
1755 nir_ssa_def
*first
= nir_imul_imm(b
, index_base
, 0x100000000);
1756 nir_ssa_def
*second
= nir_imul_imm(b
, index_base
, 0x200000000);
1757 create_indirect_load(nir_var_mem_ssbo
, 0, first
, 0x1);
1758 create_indirect_load(nir_var_mem_ssbo
, 0, second
, 0x2);
1760 nir_validate_shader(b
->shader
, NULL
);
1761 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1763 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1765 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);