2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include <gtest/gtest.h>
27 #include "nir_builder.h"
31 class nir_load_store_vectorize_test
: public ::testing::Test
{
33 nir_load_store_vectorize_test();
34 ~nir_load_store_vectorize_test();
36 unsigned count_intrinsics(nir_intrinsic_op intrinsic
);
38 nir_intrinsic_instr
*get_intrinsic(nir_intrinsic_op intrinsic
,
41 bool run_vectorizer(nir_variable_mode modes
, bool cse
=false);
43 nir_ssa_def
*get_resource(uint32_t binding
, bool ssbo
);
45 nir_intrinsic_instr
*create_indirect_load(nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
,
46 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
48 void create_indirect_store(nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
,
49 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
50 unsigned wrmask
=0xf, unsigned access
=0);
52 nir_intrinsic_instr
*create_load(nir_variable_mode mode
, uint32_t binding
, uint32_t offset
,
53 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
55 void create_store(nir_variable_mode mode
, uint32_t binding
, uint32_t offset
,
56 uint32_t id
, unsigned bit_size
=32, unsigned components
=1, unsigned wrmask
=0xf,
59 void create_shared_load(nir_deref_instr
*deref
, uint32_t id
,
60 unsigned bit_size
=32, unsigned components
=1);
61 void create_shared_store(nir_deref_instr
*deref
, uint32_t id
,
62 unsigned bit_size
=32, unsigned components
=1, unsigned wrmask
=0xf);
64 bool test_alu(nir_instr
*instr
, nir_op op
);
65 bool test_alu_def(nir_instr
*instr
, unsigned index
, nir_ssa_def
*def
, unsigned swizzle
=0);
67 static bool mem_vectorize_callback(unsigned align
, unsigned bit_size
,
68 unsigned num_components
, unsigned high_offset
,
69 nir_intrinsic_instr
*low
, nir_intrinsic_instr
*high
);
70 static void shared_type_info(const struct glsl_type
*type
, unsigned *size
, unsigned *align
);
75 std::map
<unsigned, nir_alu_src
*> loads
;
76 std::map
<unsigned, nir_ssa_def
*> res_map
;
79 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
81 glsl_type_singleton_init_or_ref();
83 mem_ctx
= ralloc_context(NULL
);
84 static const nir_shader_compiler_options options
= { };
85 b
= rzalloc(mem_ctx
, nir_builder
);
86 nir_builder_init_simple_shader(b
, mem_ctx
, MESA_SHADER_COMPUTE
, &options
);
89 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
92 printf("\nShader from the failed test:\n\n");
93 nir_print_shader(b
->shader
, stdout
);
98 glsl_type_singleton_decref();
102 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic
)
105 nir_foreach_block(block
, b
->impl
) {
106 nir_foreach_instr(instr
, block
) {
107 if (instr
->type
!= nir_instr_type_intrinsic
)
109 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
110 if (intrin
->intrinsic
== intrinsic
)
117 nir_intrinsic_instr
*
118 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic
,
121 nir_foreach_block(block
, b
->impl
) {
122 nir_foreach_instr(instr
, block
) {
123 if (instr
->type
!= nir_instr_type_intrinsic
)
125 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
126 if (intrin
->intrinsic
== intrinsic
) {
137 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes
, bool cse
)
139 if (modes
& nir_var_mem_shared
)
140 nir_lower_vars_to_explicit_types(b
->shader
, nir_var_mem_shared
, shared_type_info
);
141 bool progress
= nir_opt_load_store_vectorize(b
->shader
, modes
, mem_vectorize_callback
);
143 nir_validate_shader(b
->shader
, NULL
);
145 nir_opt_cse(b
->shader
);
146 nir_copy_prop(b
->shader
);
147 nir_opt_algebraic(b
->shader
);
148 nir_opt_constant_folding(b
->shader
);
154 nir_load_store_vectorize_test::get_resource(uint32_t binding
, bool ssbo
)
156 if (res_map
.count(binding
))
157 return res_map
[binding
];
159 nir_intrinsic_instr
*res
= nir_intrinsic_instr_create(
160 b
->shader
, nir_intrinsic_vulkan_resource_index
);
161 nir_ssa_dest_init(&res
->instr
, &res
->dest
, 1, 32, NULL
);
162 res
->num_components
= 1;
163 res
->src
[0] = nir_src_for_ssa(nir_imm_zero(b
, 1, 32));
164 nir_intrinsic_set_desc_type(
165 res
, ssbo
? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
166 nir_intrinsic_set_desc_set(res
, 0);
167 nir_intrinsic_set_binding(res
, binding
);
168 nir_builder_instr_insert(b
, &res
->instr
);
169 res_map
[binding
] = &res
->dest
.ssa
;
170 return &res
->dest
.ssa
;
173 nir_intrinsic_instr
*
174 nir_load_store_vectorize_test::create_indirect_load(
175 nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
, uint32_t id
,
176 unsigned bit_size
, unsigned components
, unsigned access
)
178 nir_intrinsic_op intrinsic
;
179 nir_ssa_def
*res
= NULL
;
181 case nir_var_mem_ubo
:
182 intrinsic
= nir_intrinsic_load_ubo
;
183 res
= get_resource(binding
, false);
185 case nir_var_mem_ssbo
:
186 intrinsic
= nir_intrinsic_load_ssbo
;
187 res
= get_resource(binding
, true);
189 case nir_var_mem_push_const
:
190 intrinsic
= nir_intrinsic_load_push_constant
;
195 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, intrinsic
);
196 nir_ssa_dest_init(&load
->instr
, &load
->dest
, components
, bit_size
, NULL
);
197 load
->num_components
= components
;
199 load
->src
[0] = nir_src_for_ssa(res
);
200 load
->src
[1] = nir_src_for_ssa(offset
);
202 load
->src
[0] = nir_src_for_ssa(offset
);
204 if (mode
!= nir_var_mem_push_const
) {
205 nir_intrinsic_set_align(load
, (bit_size
== 1 ? 32 : bit_size
) / 8, 0);
206 nir_intrinsic_set_access(load
, (gl_access_qualifier
)access
);
208 nir_builder_instr_insert(b
, &load
->instr
);
209 nir_instr
*mov
= nir_mov(b
, &load
->dest
.ssa
)->parent_instr
;
210 loads
[id
] = &nir_instr_as_alu(mov
)->src
[0];
216 nir_load_store_vectorize_test::create_indirect_store(
217 nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
, uint32_t id
,
218 unsigned bit_size
, unsigned components
, unsigned wrmask
, unsigned access
)
220 nir_const_value values
[NIR_MAX_VEC_COMPONENTS
];
221 for (unsigned i
= 0; i
< components
; i
++)
222 values
[i
] = nir_const_value_for_raw_uint((id
<< 4) | i
, bit_size
);
223 nir_ssa_def
*value
= nir_build_imm(b
, components
, bit_size
, values
);
225 nir_intrinsic_op intrinsic
;
226 nir_ssa_def
*res
= NULL
;
228 case nir_var_mem_ssbo
:
229 intrinsic
= nir_intrinsic_store_ssbo
;
230 res
= get_resource(binding
, true);
232 case nir_var_mem_shared
:
233 intrinsic
= nir_intrinsic_store_shared
;
238 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
->shader
, intrinsic
);
239 nir_ssa_dest_init(&store
->instr
, &store
->dest
, components
, bit_size
, NULL
);
240 store
->num_components
= components
;
242 store
->src
[0] = nir_src_for_ssa(value
);
243 store
->src
[1] = nir_src_for_ssa(res
);
244 store
->src
[2] = nir_src_for_ssa(offset
);
246 store
->src
[0] = nir_src_for_ssa(value
);
247 store
->src
[1] = nir_src_for_ssa(offset
);
249 nir_intrinsic_set_align(store
, (bit_size
== 1 ? 32 : bit_size
) / 8, 0);
250 nir_intrinsic_set_access(store
, (gl_access_qualifier
)access
);
251 nir_intrinsic_set_write_mask(store
, wrmask
& ((1 << components
) - 1));
252 nir_builder_instr_insert(b
, &store
->instr
);
255 nir_intrinsic_instr
*
256 nir_load_store_vectorize_test::create_load(
257 nir_variable_mode mode
, uint32_t binding
, uint32_t offset
, uint32_t id
,
258 unsigned bit_size
, unsigned components
, unsigned access
)
260 return create_indirect_load(mode
, binding
, nir_imm_int(b
, offset
), id
, bit_size
, components
, access
);
264 nir_load_store_vectorize_test::create_store(
265 nir_variable_mode mode
, uint32_t binding
, uint32_t offset
, uint32_t id
,
266 unsigned bit_size
, unsigned components
, unsigned wrmask
, unsigned access
)
268 create_indirect_store(mode
, binding
, nir_imm_int(b
, offset
), id
, bit_size
, components
, wrmask
, access
);
271 void nir_load_store_vectorize_test::create_shared_load(
272 nir_deref_instr
*deref
, uint32_t id
, unsigned bit_size
, unsigned components
)
274 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_deref
);
275 nir_ssa_dest_init(&load
->instr
, &load
->dest
, components
, bit_size
, NULL
);
276 load
->num_components
= components
;
277 load
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
278 nir_builder_instr_insert(b
, &load
->instr
);
279 nir_instr
*mov
= nir_mov(b
, &load
->dest
.ssa
)->parent_instr
;
280 loads
[id
] = &nir_instr_as_alu(mov
)->src
[0];
283 void nir_load_store_vectorize_test::create_shared_store(
284 nir_deref_instr
*deref
, uint32_t id
,
285 unsigned bit_size
, unsigned components
, unsigned wrmask
)
287 nir_const_value values
[NIR_MAX_VEC_COMPONENTS
];
288 for (unsigned i
= 0; i
< components
; i
++)
289 values
[i
] = nir_const_value_for_raw_uint((id
<< 4) | i
, bit_size
);
290 nir_ssa_def
*value
= nir_build_imm(b
, components
, bit_size
, values
);
292 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_deref
);
293 nir_ssa_dest_init(&store
->instr
, &store
->dest
, components
, bit_size
, NULL
);
294 store
->num_components
= components
;
295 store
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
296 store
->src
[1] = nir_src_for_ssa(value
);
297 nir_intrinsic_set_write_mask(store
, wrmask
& ((1 << components
) - 1));
298 nir_builder_instr_insert(b
, &store
->instr
);
301 bool nir_load_store_vectorize_test::test_alu(nir_instr
*instr
, nir_op op
)
303 return instr
->type
== nir_instr_type_alu
&& nir_instr_as_alu(instr
)->op
== op
;
306 bool nir_load_store_vectorize_test::test_alu_def(
307 nir_instr
*instr
, unsigned index
, nir_ssa_def
*def
, unsigned swizzle
)
309 if (instr
->type
!= nir_instr_type_alu
)
312 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
314 if (index
>= nir_op_infos
[alu
->op
].num_inputs
)
316 if (alu
->src
[index
].src
.ssa
!= def
)
318 if (alu
->src
[index
].swizzle
[0] != swizzle
)
324 bool nir_load_store_vectorize_test::mem_vectorize_callback(
325 unsigned align
, unsigned bit_size
, unsigned num_components
, unsigned high_offset
,
326 nir_intrinsic_instr
*low
, nir_intrinsic_instr
*high
)
331 void nir_load_store_vectorize_test::shared_type_info(
332 const struct glsl_type
*type
, unsigned *size
, unsigned *align
)
334 assert(glsl_type_is_vector_or_scalar(type
));
336 uint32_t comp_size
= glsl_type_is_boolean(type
)
337 ? 4 : glsl_get_bit_size(type
) / 8;
338 unsigned length
= glsl_get_vector_elements(type
);
339 *size
= comp_size
* length
,
344 TEST_F(nir_load_store_vectorize_test
, ubo_load_adjacent
)
346 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
347 create_load(nir_var_mem_ubo
, 0, 4, 0x2);
349 nir_validate_shader(b
->shader
, NULL
);
350 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
352 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
354 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
356 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
357 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
358 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
359 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
360 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
361 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
362 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
363 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
366 TEST_F(nir_load_store_vectorize_test
, ubo_load_intersecting
)
368 create_load(nir_var_mem_ubo
, 0, 0, 0x1, 32, 2);
369 create_load(nir_var_mem_ubo
, 0, 4, 0x2, 32, 2);
371 nir_validate_shader(b
->shader
, NULL
);
372 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
374 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
376 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
378 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
379 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
380 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
381 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
382 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
383 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
384 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
385 ASSERT_EQ(loads
[0x1]->swizzle
[1], 1);
386 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
387 ASSERT_EQ(loads
[0x2]->swizzle
[1], 2);
390 TEST_F(nir_load_store_vectorize_test
, ubo_load_identical
)
392 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
393 create_load(nir_var_mem_ubo
, 0, 0, 0x2);
395 nir_validate_shader(b
->shader
, NULL
);
396 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
398 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
400 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
402 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
403 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
404 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
405 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
406 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
407 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
408 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
409 ASSERT_EQ(loads
[0x2]->swizzle
[0], 0);
412 TEST_F(nir_load_store_vectorize_test
, ubo_load_large
)
414 create_load(nir_var_mem_ubo
, 0, 0, 0x1, 32, 2);
415 create_load(nir_var_mem_ubo
, 0, 8, 0x2, 32, 3);
417 nir_validate_shader(b
->shader
, NULL
);
418 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
420 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
422 nir_validate_shader(b
->shader
, NULL
);
423 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
426 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent
)
428 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
429 create_load(nir_var_mem_push_const
, 0, 4, 0x2);
431 nir_validate_shader(b
->shader
, NULL
);
432 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
434 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
436 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
438 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
439 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
440 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
441 ASSERT_EQ(nir_src_as_uint(load
->src
[0]), 0);
442 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
443 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
444 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
445 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
448 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent_base
)
450 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
451 nir_intrinsic_set_base(create_load(nir_var_mem_push_const
, 0, 0, 0x2), 4);
453 nir_validate_shader(b
->shader
, NULL
);
454 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
456 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
458 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
460 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
461 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
462 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
463 ASSERT_EQ(nir_src_as_uint(load
->src
[0]), 0);
464 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
465 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
466 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
467 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
470 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent
)
472 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
473 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
475 nir_validate_shader(b
->shader
, NULL
);
476 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
478 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
480 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
482 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
483 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
484 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
485 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
486 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
487 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
488 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
489 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
492 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect
)
494 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
495 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x1);
496 create_indirect_load(nir_var_mem_ssbo
, 0, nir_iadd_imm(b
, index_base
, 4), 0x2);
498 nir_validate_shader(b
->shader
, NULL
);
499 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
501 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
503 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
505 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
506 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
507 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
508 ASSERT_EQ(load
->src
[1].ssa
, index_base
);
509 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
510 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
511 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
512 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
515 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect_sub
)
517 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
518 nir_ssa_def
*index_base_prev
= nir_iadd_imm(b
, index_base
, 0xfffffffc);
519 create_indirect_load(nir_var_mem_ssbo
, 0, index_base_prev
, 0x1);
520 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x2);
522 nir_validate_shader(b
->shader
, NULL
);
523 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
525 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
527 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
529 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
530 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
531 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
532 ASSERT_EQ(load
->src
[1].ssa
, index_base_prev
);
533 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
534 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
535 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
536 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
539 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect_neg_stride
)
541 nir_ssa_def
*inv
= nir_load_local_invocation_index(b
);
542 nir_ssa_def
*inv_plus_one
= nir_iadd_imm(b
, inv
, 1);
543 nir_ssa_def
*index_base
= nir_imul_imm(b
, inv
, 0xfffffffc);
544 nir_ssa_def
*index_base_prev
= nir_imul_imm(b
, inv_plus_one
, 0xfffffffc);
545 create_indirect_load(nir_var_mem_ssbo
, 0, index_base_prev
, 0x1);
546 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x2);
548 nir_validate_shader(b
->shader
, NULL
);
549 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
551 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
553 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
555 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
556 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
557 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
558 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
559 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
560 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
561 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
563 /* nir_opt_algebraic optimizes the imul */
564 ASSERT_TRUE(test_alu(load
->src
[1].ssa
->parent_instr
, nir_op_ineg
));
565 nir_ssa_def
*offset
= nir_instr_as_alu(load
->src
[1].ssa
->parent_instr
)->src
[0].src
.ssa
;
566 ASSERT_TRUE(test_alu(offset
->parent_instr
, nir_op_ishl
));
567 nir_alu_instr
*shl
= nir_instr_as_alu(offset
->parent_instr
);
568 ASSERT_EQ(shl
->src
[0].src
.ssa
, inv_plus_one
);
569 ASSERT_EQ(nir_src_as_uint(shl
->src
[1].src
), 2);
572 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_adjacent
)
574 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
575 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
576 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
578 nir_validate_shader(b
->shader
, NULL
);
579 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
581 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
583 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
585 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
586 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
587 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
588 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
589 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
590 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
591 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
592 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
595 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_intersecting
)
597 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
598 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
599 create_load(nir_var_mem_ssbo
, 0, 0, 0x3, 32, 2);
601 nir_validate_shader(b
->shader
, NULL
);
602 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
604 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
606 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
609 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_identical
)
611 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
612 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
613 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
615 nir_validate_shader(b
->shader
, NULL
);
616 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
618 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
620 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
623 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical_load_identical
)
625 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
626 create_load(nir_var_mem_ssbo
, 0, 0, 0x2);
627 create_store(nir_var_mem_ssbo
, 0, 0, 0x3);
629 nir_validate_shader(b
->shader
, NULL
);
630 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
632 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
634 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
637 /* if nir_opt_load_store_vectorize were implemented like many load/store
638 * optimization passes are (for example, nir_opt_combine_stores and
639 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
640 * encountered, this case wouldn't be optimized.
641 * A similar test for derefs is shared_load_adjacent_store_identical. */
642 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_store_identical
)
644 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
645 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
646 create_load(nir_var_mem_ssbo
, 0, 4, 0x3);
648 nir_validate_shader(b
->shader
, NULL
);
649 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
650 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
652 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
654 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
655 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
657 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
658 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
659 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
660 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
661 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
662 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
663 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
664 ASSERT_EQ(loads
[0x3]->swizzle
[0], 1);
667 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent
)
669 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
670 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
672 nir_validate_shader(b
->shader
, NULL
);
673 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
675 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
677 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
679 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
680 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
681 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x3);
682 nir_ssa_def
*val
= store
->src
[0].ssa
;
683 ASSERT_EQ(val
->bit_size
, 32);
684 ASSERT_EQ(val
->num_components
, 2);
685 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
686 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
687 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
690 TEST_F(nir_load_store_vectorize_test
, ssbo_store_intersecting
)
692 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
693 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 32, 2);
695 nir_validate_shader(b
->shader
, NULL
);
696 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
698 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
700 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
702 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
703 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
704 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
705 nir_ssa_def
*val
= store
->src
[0].ssa
;
706 ASSERT_EQ(val
->bit_size
, 32);
707 ASSERT_EQ(val
->num_components
, 3);
708 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
709 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
710 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
711 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x21);
714 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical
)
716 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
717 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
719 nir_validate_shader(b
->shader
, NULL
);
720 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
722 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
724 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
726 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
727 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
728 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x1);
729 nir_ssa_def
*val
= store
->src
[0].ssa
;
730 ASSERT_EQ(val
->bit_size
, 32);
731 ASSERT_EQ(val
->num_components
, 1);
732 ASSERT_EQ(nir_src_as_uint(store
->src
[0]), 0x20);
735 TEST_F(nir_load_store_vectorize_test
, ssbo_store_large
)
737 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
738 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 32, 3);
740 nir_validate_shader(b
->shader
, NULL
);
741 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
743 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
745 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
748 TEST_F(nir_load_store_vectorize_test
, ubo_load_adjacent_memory_barrier
)
750 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
752 nir_scoped_memory_barrier(b
, NIR_SCOPE_DEVICE
, NIR_MEMORY_ACQ_REL
,
755 create_load(nir_var_mem_ubo
, 0, 4, 0x2);
757 nir_validate_shader(b
->shader
, NULL
);
758 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
760 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
762 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
765 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_memory_barrier
)
767 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
769 nir_scoped_memory_barrier(b
, NIR_SCOPE_DEVICE
, NIR_MEMORY_ACQ_REL
,
772 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
774 nir_validate_shader(b
->shader
, NULL
);
775 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
777 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
779 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
782 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
783 * doesn't require that loads/stores complete.
785 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_barrier
)
787 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
788 nir_builder_instr_insert(b
, &nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_control_barrier
)->instr
);
789 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
791 nir_validate_shader(b
->shader
, NULL
);
792 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
794 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
796 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
799 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_memory_barrier_shared
)
801 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
803 nir_scoped_memory_barrier(b
, NIR_SCOPE_WORKGROUP
, NIR_MEMORY_ACQ_REL
,
806 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
808 nir_validate_shader(b
->shader
, NULL
);
809 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
811 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
813 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
816 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_8_8_16
)
818 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 8);
819 create_load(nir_var_mem_ssbo
, 0, 1, 0x2, 8);
820 create_load(nir_var_mem_ssbo
, 0, 2, 0x3, 16);
822 nir_validate_shader(b
->shader
, NULL
);
823 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 3);
825 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
827 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
829 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
830 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 8);
831 ASSERT_EQ(load
->dest
.ssa
.num_components
, 4);
832 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
833 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
834 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
835 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
836 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
838 nir_ssa_def
*val
= loads
[0x3]->src
.ssa
;
839 ASSERT_EQ(val
->bit_size
, 16);
840 ASSERT_EQ(val
->num_components
, 1);
841 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_ior
));
842 nir_ssa_def
*low
= nir_instr_as_alu(val
->parent_instr
)->src
[0].src
.ssa
;
843 nir_ssa_def
*high
= nir_instr_as_alu(val
->parent_instr
)->src
[1].src
.ssa
;
844 ASSERT_TRUE(test_alu(high
->parent_instr
, nir_op_ishl
));
845 high
= nir_instr_as_alu(high
->parent_instr
)->src
[0].src
.ssa
;
846 ASSERT_TRUE(test_alu(low
->parent_instr
, nir_op_u2u16
));
847 ASSERT_TRUE(test_alu(high
->parent_instr
, nir_op_u2u16
));
848 ASSERT_TRUE(test_alu_def(low
->parent_instr
, 0, &load
->dest
.ssa
, 2));
849 ASSERT_TRUE(test_alu_def(high
->parent_instr
, 0, &load
->dest
.ssa
, 3));
852 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_32_32_64
)
854 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
855 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
857 nir_validate_shader(b
->shader
, NULL
);
858 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
860 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
862 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
864 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
865 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
866 ASSERT_EQ(load
->dest
.ssa
.num_components
, 4);
867 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
868 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
869 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
870 ASSERT_EQ(loads
[0x1]->swizzle
[1], 1);
872 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
873 ASSERT_EQ(val
->bit_size
, 64);
874 ASSERT_EQ(val
->num_components
, 1);
875 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
876 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
877 ASSERT_EQ(pack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
878 ASSERT_EQ(pack
->src
[0].swizzle
[0], 2);
879 ASSERT_EQ(pack
->src
[0].swizzle
[1], 3);
882 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_32_32_64_64
)
884 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
885 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
886 create_load(nir_var_mem_ssbo
, 0, 16, 0x3, 64);
888 nir_validate_shader(b
->shader
, NULL
);
889 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 3);
891 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
, true));
893 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
895 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
896 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 64);
897 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
898 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
899 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
900 ASSERT_EQ(loads
[0x3]->swizzle
[0], 2);
902 /* pack_64_2x32(unpack_64_2x32()) is created because the 32-bit and first
903 * 64-bit loads are combined before the second 64-bit load is even considered. */
904 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
905 ASSERT_EQ(val
->bit_size
, 64);
906 ASSERT_EQ(val
->num_components
, 1);
907 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
908 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
909 ASSERT_TRUE(test_alu(pack
->src
[0].src
.ssa
->parent_instr
, nir_op_unpack_64_2x32
));
910 nir_alu_instr
*unpack
= nir_instr_as_alu(pack
->src
[0].src
.ssa
->parent_instr
);
911 ASSERT_EQ(unpack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
912 ASSERT_EQ(unpack
->src
[0].swizzle
[0], 1);
914 val
= loads
[0x1]->src
.ssa
;
915 ASSERT_EQ(val
->bit_size
, 32);
916 ASSERT_EQ(val
->num_components
, 2);
917 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_unpack_64_2x32
));
918 unpack
= nir_instr_as_alu(val
->parent_instr
);
919 ASSERT_EQ(unpack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
920 ASSERT_EQ(unpack
->src
[0].swizzle
[0], 0);
923 TEST_F(nir_load_store_vectorize_test
, ssbo_load_intersecting_32_32_64
)
925 create_load(nir_var_mem_ssbo
, 0, 4, 0x1, 32, 2);
926 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
928 nir_validate_shader(b
->shader
, NULL
);
929 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
931 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
933 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
935 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
936 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
937 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
938 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 4);
939 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
940 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
941 ASSERT_EQ(loads
[0x1]->swizzle
[1], 1);
943 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
944 ASSERT_EQ(val
->bit_size
, 64);
945 ASSERT_EQ(val
->num_components
, 1);
946 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
947 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
948 ASSERT_EQ(pack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
949 ASSERT_EQ(pack
->src
[0].swizzle
[0], 1);
950 ASSERT_EQ(pack
->src
[0].swizzle
[1], 2);
953 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_8_8_16
)
955 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 8);
956 create_store(nir_var_mem_ssbo
, 0, 1, 0x2, 8);
957 create_store(nir_var_mem_ssbo
, 0, 2, 0x3, 16);
959 nir_validate_shader(b
->shader
, NULL
);
960 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 3);
962 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
964 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
966 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
967 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
968 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
969 nir_ssa_def
*val
= store
->src
[0].ssa
;
970 ASSERT_EQ(val
->bit_size
, 8);
971 ASSERT_EQ(val
->num_components
, 4);
972 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
973 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 8), 0x10);
974 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 8), 0x20);
975 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 8), 0x30);
976 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 8), 0x0);
979 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_32_64
)
981 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
982 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
984 nir_validate_shader(b
->shader
, NULL
);
985 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
987 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
989 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
991 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
992 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
993 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
994 nir_ssa_def
*val
= store
->src
[0].ssa
;
995 ASSERT_EQ(val
->bit_size
, 32);
996 ASSERT_EQ(val
->num_components
, 4);
997 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
998 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
999 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x11);
1000 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x20);
1001 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 32), 0x0);
1004 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_32_64_64
)
1006 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
1007 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
1008 create_store(nir_var_mem_ssbo
, 0, 16, 0x3, 64);
1010 nir_validate_shader(b
->shader
, NULL
);
1011 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 3);
1013 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1015 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1017 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1018 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1019 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
1020 nir_ssa_def
*val
= store
->src
[0].ssa
;
1021 ASSERT_EQ(val
->bit_size
, 64);
1022 ASSERT_EQ(val
->num_components
, 3);
1023 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1024 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 64), 0x1100000010ull
);
1025 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 64), 0x20);
1026 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 64), 0x30);
1029 TEST_F(nir_load_store_vectorize_test
, ssbo_store_intersecting_32_32_64
)
1031 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
1032 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 64);
1034 nir_validate_shader(b
->shader
, NULL
);
1035 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1037 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1039 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1041 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1042 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1043 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
1044 nir_ssa_def
*val
= store
->src
[0].ssa
;
1045 ASSERT_EQ(val
->bit_size
, 32);
1046 ASSERT_EQ(val
->num_components
, 3);
1047 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1048 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1049 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
1050 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x0);
1053 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_64
)
1055 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32);
1056 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 64, 2);
1058 nir_validate_shader(b
->shader
, NULL
);
1059 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1061 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1063 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1066 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical_wrmask
)
1068 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 4, 1 | 4);
1069 create_store(nir_var_mem_ssbo
, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1071 nir_validate_shader(b
->shader
, NULL
);
1072 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1074 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1076 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1078 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1079 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1080 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
1081 nir_ssa_def
*val
= store
->src
[0].ssa
;
1082 ASSERT_EQ(val
->bit_size
, 32);
1083 ASSERT_EQ(val
->num_components
, 4);
1084 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1085 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1086 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x21);
1087 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x22);
1088 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 32), 0x23);
1091 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent
)
1093 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1094 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1096 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1097 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1099 nir_validate_shader(b
->shader
, NULL
);
1100 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1102 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1104 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1106 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1107 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1108 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1110 deref
= nir_src_as_deref(load
->src
[0]);
1111 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1113 deref
= nir_deref_instr_parent(deref
);
1114 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1115 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1117 deref
= nir_deref_instr_parent(deref
);
1118 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1119 ASSERT_EQ(deref
->var
, var
);
1121 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1122 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1123 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1124 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1127 TEST_F(nir_load_store_vectorize_test
, shared_load_distant_64bit
)
1129 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1130 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1131 nir_ssa_dest_init(&deref
->instr
, &deref
->dest
, 1, 64, NULL
);
1133 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0x100000000), 0x1);
1134 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0x200000001), 0x2);
1136 nir_validate_shader(b
->shader
, NULL
);
1137 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1139 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared
));
1141 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1144 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_indirect
)
1146 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1147 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1148 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1150 create_shared_load(nir_build_deref_array(b
, deref
, index_base
), 0x1);
1151 create_shared_load(nir_build_deref_array(b
, deref
, nir_iadd_imm(b
, index_base
, 1)), 0x2);
1153 nir_validate_shader(b
->shader
, NULL
);
1154 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1156 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1158 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1160 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1161 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1162 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1164 deref
= nir_src_as_deref(load
->src
[0]);
1165 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1167 deref
= nir_deref_instr_parent(deref
);
1168 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1169 ASSERT_EQ(deref
->arr
.index
.ssa
, index_base
);
1171 deref
= nir_deref_instr_parent(deref
);
1172 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1173 ASSERT_EQ(deref
->var
, var
);
1175 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1176 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1177 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1178 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1181 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_indirect_sub
)
1183 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1184 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1185 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1186 nir_ssa_def
*index_base_prev
= nir_iadd_imm(b
, index_base
, 0xffffffff);
1188 create_shared_load(nir_build_deref_array(b
, deref
, index_base_prev
), 0x1);
1189 create_shared_load(nir_build_deref_array(b
, deref
, index_base
), 0x2);
1191 nir_validate_shader(b
->shader
, NULL
);
1192 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1194 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1196 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1198 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1199 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1200 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1202 deref
= nir_src_as_deref(load
->src
[0]);
1203 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1205 deref
= nir_deref_instr_parent(deref
);
1206 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1207 ASSERT_EQ(deref
->arr
.index
.ssa
, index_base_prev
);
1209 deref
= nir_deref_instr_parent(deref
);
1210 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1211 ASSERT_EQ(deref
->var
, var
);
1213 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1214 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1215 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1216 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1219 TEST_F(nir_load_store_vectorize_test
, shared_load_struct
)
1221 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1222 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1224 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1225 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1227 create_shared_load(nir_build_deref_struct(b
, deref
, 0), 0x1);
1228 create_shared_load(nir_build_deref_array_imm(b
, nir_build_deref_struct(b
, deref
, 1), 0), 0x2);
1230 nir_validate_shader(b
->shader
, NULL
);
1231 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1233 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1235 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1237 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1238 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1239 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1241 deref
= nir_src_as_deref(load
->src
[0]);
1242 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1244 deref
= nir_deref_instr_parent(deref
);
1245 ASSERT_EQ(deref
->deref_type
, nir_deref_type_struct
);
1246 ASSERT_EQ(deref
->strct
.index
, 0);
1248 deref
= nir_deref_instr_parent(deref
);
1249 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1250 ASSERT_EQ(deref
->var
, var
);
1252 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1253 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1254 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1255 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1258 TEST_F(nir_load_store_vectorize_test
, shared_load_identical_store_adjacent
)
1260 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1261 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1263 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1264 create_shared_store(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1265 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x3);
1267 nir_validate_shader(b
->shader
, NULL
);
1268 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1269 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1271 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1273 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1274 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1276 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1277 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1278 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1280 deref
= nir_src_as_deref(load
->src
[0]);
1281 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1282 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1284 deref
= nir_deref_instr_parent(deref
);
1285 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1286 ASSERT_EQ(deref
->var
, var
);
1288 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1289 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1290 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1291 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1294 TEST_F(nir_load_store_vectorize_test
, shared_load_identical_store_identical
)
1296 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1297 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1299 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1300 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x2);
1301 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x3);
1303 nir_validate_shader(b
->shader
, NULL
);
1304 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1306 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared
));
1308 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1311 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_store_identical
)
1313 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1314 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1316 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1317 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x2);
1318 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x3);
1320 nir_validate_shader(b
->shader
, NULL
);
1321 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1322 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1324 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1326 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1327 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1329 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1330 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1331 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1333 deref
= nir_src_as_deref(load
->src
[0]);
1334 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1336 deref
= nir_deref_instr_parent(deref
);
1337 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1338 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1340 deref
= nir_deref_instr_parent(deref
);
1341 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1342 ASSERT_EQ(deref
->var
, var
);
1344 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1345 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1346 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1347 ASSERT_EQ(loads
[0x3]->swizzle
[0], 1);
1350 TEST_F(nir_load_store_vectorize_test
, shared_load_bool
)
1352 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1353 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1355 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1, 1);
1356 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x2, 1);
1358 nir_validate_shader(b
->shader
, NULL
);
1359 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1361 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1363 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1365 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1366 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1367 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1369 deref
= nir_src_as_deref(load
->src
[0]);
1370 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1372 deref
= nir_deref_instr_parent(deref
);
1373 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1374 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1376 deref
= nir_deref_instr_parent(deref
);
1377 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1378 ASSERT_EQ(deref
->var
, var
);
1380 ASSERT_TRUE(test_alu(loads
[0x1]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1381 ASSERT_TRUE(test_alu(loads
[0x2]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1382 ASSERT_TRUE(test_alu_def(loads
[0x1]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 0));
1383 ASSERT_TRUE(test_alu_def(loads
[0x2]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 1));
1386 TEST_F(nir_load_store_vectorize_test
, shared_load_bool_mixed
)
1388 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1389 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1391 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1392 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1394 create_shared_load(nir_build_deref_struct(b
, deref
, 0), 0x1, 1);
1395 create_shared_load(nir_build_deref_array_imm(b
, nir_build_deref_struct(b
, deref
, 1), 0), 0x2);
1397 nir_validate_shader(b
->shader
, NULL
);
1398 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1400 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1402 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1404 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1405 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1406 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1408 deref
= nir_src_as_deref(load
->src
[0]);
1409 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1411 deref
= nir_deref_instr_parent(deref
);
1412 ASSERT_EQ(deref
->deref_type
, nir_deref_type_struct
);
1413 ASSERT_EQ(deref
->strct
.index
, 0);
1415 deref
= nir_deref_instr_parent(deref
);
1416 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1417 ASSERT_EQ(deref
->var
, var
);
1419 ASSERT_TRUE(test_alu(loads
[0x1]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1420 ASSERT_TRUE(test_alu_def(loads
[0x1]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 0));
1421 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1422 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1425 TEST_F(nir_load_store_vectorize_test
, shared_store_adjacent
)
1427 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1428 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1430 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1431 create_shared_store(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1433 nir_validate_shader(b
->shader
, NULL
);
1434 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 2);
1436 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1438 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1440 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_deref
, 0);
1441 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x3);
1442 nir_ssa_def
*val
= store
->src
[1].ssa
;
1443 ASSERT_EQ(val
->bit_size
, 32);
1444 ASSERT_EQ(val
->num_components
, 2);
1445 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1446 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1447 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
1449 deref
= nir_src_as_deref(store
->src
[0]);
1450 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1452 deref
= nir_deref_instr_parent(deref
);
1453 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1454 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1456 deref
= nir_deref_instr_parent(deref
);
1457 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1458 ASSERT_EQ(deref
->var
, var
);
1461 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_base
)
1463 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1464 nir_intrinsic_set_base(create_load(nir_var_mem_push_const
, 0, 4, 0x2), 4);
1466 nir_validate_shader(b
->shader
, NULL
);
1467 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1469 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1471 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1474 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_direct_direct
)
1476 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1477 create_load(nir_var_mem_push_const
, 0, 8, 0x2);
1479 nir_validate_shader(b
->shader
, NULL
);
1480 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1482 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1484 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1487 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_direct_indirect
)
1489 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1490 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1491 create_indirect_load(nir_var_mem_push_const
, 0, index_base
, 0x2);
1493 nir_validate_shader(b
->shader
, NULL
);
1494 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1496 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1498 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1501 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_indirect_indirect
)
1503 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1504 create_indirect_load(nir_var_mem_push_const
, 0,
1505 nir_iadd(b
, nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 2)), nir_imm_int(b
, 16)), nir_imm_int(b
, 32)), 0x1);
1506 create_indirect_load(nir_var_mem_push_const
, 0,
1507 nir_iadd(b
, nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 3)), nir_imm_int(b
, 16)), nir_imm_int(b
, 32)), 0x2);
1509 nir_validate_shader(b
->shader
, NULL
);
1510 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1512 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1514 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1517 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent_complex_indirect
)
1519 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1520 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1521 nir_ssa_def
*low
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 12));
1522 nir_ssa_def
*high
= nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 1)), nir_imm_int(b
, 16));
1523 create_indirect_load(nir_var_mem_push_const
, 0, low
, 0x1);
1524 create_indirect_load(nir_var_mem_push_const
, 0, high
, 0x2);
1526 nir_validate_shader(b
->shader
, NULL
);
1527 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1529 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
1531 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
1533 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
1534 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1535 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1536 ASSERT_EQ(load
->src
[0].ssa
, low
);
1537 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1538 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1539 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1540 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1543 TEST_F(nir_load_store_vectorize_test
, ssbo_alias0
)
1545 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1546 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
1547 create_indirect_store(nir_var_mem_ssbo
, 0, index_base
, 0x2);
1548 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
1550 nir_validate_shader(b
->shader
, NULL
);
1551 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1553 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1555 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1558 TEST_F(nir_load_store_vectorize_test
, ssbo_alias1
)
1560 nir_ssa_def
*load_base
= nir_load_global_invocation_index(b
, 32);
1561 nir_ssa_def
*store_base
= nir_load_local_invocation_index(b
);
1562 create_indirect_load(nir_var_mem_ssbo
, 0, load_base
, 0x1);
1563 create_indirect_store(nir_var_mem_ssbo
, 0, store_base
, 0x2);
1564 create_indirect_load(nir_var_mem_ssbo
, 0, load_base
, 0x3);
1566 nir_validate_shader(b
->shader
, NULL
);
1567 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1569 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1571 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1574 TEST_F(nir_load_store_vectorize_test
, DISABLED_ssbo_alias2
)
1576 /* TODO: try to combine these loads */
1577 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1578 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 4));
1579 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1580 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1581 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1583 nir_validate_shader(b
->shader
, NULL
);
1584 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1586 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1588 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1590 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1591 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1592 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1593 ASSERT_EQ(load
->src
[1].ssa
, offset
);
1594 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1595 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1596 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1597 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1600 TEST_F(nir_load_store_vectorize_test
, ssbo_alias3
)
1602 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1603 * these loads can't be combined because if index_base == 268435455, then
1604 * offset == 0 because the addition would wrap around */
1605 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1606 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 16));
1607 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1608 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1609 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1611 nir_validate_shader(b
->shader
, NULL
);
1612 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1614 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1616 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1619 TEST_F(nir_load_store_vectorize_test
, DISABLED_ssbo_alias4
)
1621 /* TODO: try to combine these loads */
1622 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1623 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 16));
1624 nir_instr_as_alu(offset
->parent_instr
)->no_unsigned_wrap
= true;
1625 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1626 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1627 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1629 nir_validate_shader(b
->shader
, NULL
);
1630 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1632 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1634 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1636 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1637 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1638 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1639 ASSERT_EQ(load
->src
[1].ssa
, offset
);
1640 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1641 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1642 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1643 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1646 TEST_F(nir_load_store_vectorize_test
, ssbo_alias5
)
1648 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
1649 create_store(nir_var_mem_ssbo
, 1, 0, 0x2);
1650 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
1652 nir_validate_shader(b
->shader
, NULL
);
1653 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1655 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1657 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1660 TEST_F(nir_load_store_vectorize_test
, ssbo_alias6
)
1662 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT
);
1663 create_store(nir_var_mem_ssbo
, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT
);
1664 create_load(nir_var_mem_ssbo
, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT
);
1666 nir_validate_shader(b
->shader
, NULL
);
1667 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1669 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1671 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1673 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1674 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1675 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1676 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
1677 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1678 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1679 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1680 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1683 TEST_F(nir_load_store_vectorize_test
, DISABLED_shared_alias0
)
1685 /* TODO: implement type-based alias analysis so that these loads can be
1686 * combined. this is made a bit more difficult than simply using
1687 * nir_compare_derefs() because the vectorizer creates loads/stores with
1688 * casted derefs. The solution would probably be to keep multiple derefs for
1689 * an entry (one for each load/store combined into it). */
1690 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1691 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1693 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1694 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1696 nir_ssa_def
*index0
= nir_load_local_invocation_index(b
);
1697 nir_ssa_def
*index1
= nir_load_global_invocation_index(b
, 32);
1698 nir_deref_instr
*load_deref
= nir_build_deref_array(b
, nir_build_deref_struct(b
, deref
, 0), index0
);
1700 create_shared_load(load_deref
, 0x1);
1701 create_shared_store(nir_build_deref_array(b
, nir_build_deref_struct(b
, deref
, 1), index1
), 0x2);
1702 create_shared_load(load_deref
, 0x3);
1704 nir_validate_shader(b
->shader
, NULL
);
1705 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1707 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1709 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1711 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1712 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1713 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1714 ASSERT_EQ(load
->src
[0].ssa
, &load_deref
->dest
.ssa
);
1715 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1716 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1717 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1718 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1721 TEST_F(nir_load_store_vectorize_test
, shared_alias1
)
1723 nir_variable
*var0
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_uint_type(), "var0");
1724 nir_variable
*var1
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_uint_type(), "var1");
1725 nir_deref_instr
*load_deref
= nir_build_deref_var(b
, var0
);
1727 create_shared_load(load_deref
, 0x1);
1728 create_shared_store(nir_build_deref_var(b
, var1
), 0x2);
1729 create_shared_load(load_deref
, 0x3);
1731 nir_validate_shader(b
->shader
, NULL
);
1732 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1734 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1736 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1738 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1739 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1740 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1741 ASSERT_EQ(load
->src
[0].ssa
, &load_deref
->dest
.ssa
);
1742 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1743 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1744 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1745 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1748 TEST_F(nir_load_store_vectorize_test
, ssbo_load_distant_64bit
)
1750 create_indirect_load(nir_var_mem_ssbo
, 0, nir_imm_intN_t(b
, 0x100000000, 64), 0x1);
1751 create_indirect_load(nir_var_mem_ssbo
, 0, nir_imm_intN_t(b
, 0x200000004, 64), 0x2);
1753 nir_validate_shader(b
->shader
, NULL
);
1754 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1756 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1758 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1761 TEST_F(nir_load_store_vectorize_test
, ssbo_load_distant_indirect_64bit
)
1763 nir_ssa_def
*index_base
= nir_u2u64(b
, nir_load_local_invocation_index(b
));
1764 nir_ssa_def
*first
= nir_imul_imm(b
, index_base
, 0x100000000);
1765 nir_ssa_def
*second
= nir_imul_imm(b
, index_base
, 0x200000000);
1766 create_indirect_load(nir_var_mem_ssbo
, 0, first
, 0x1);
1767 create_indirect_load(nir_var_mem_ssbo
, 0, second
, 0x2);
1769 nir_validate_shader(b
->shader
, NULL
);
1770 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1772 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1774 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);