2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include <gtest/gtest.h>
27 #include "nir_builder.h"
31 class nir_load_store_vectorize_test
: public ::testing::Test
{
33 nir_load_store_vectorize_test();
34 ~nir_load_store_vectorize_test();
36 unsigned count_intrinsics(nir_intrinsic_op intrinsic
);
38 nir_intrinsic_instr
*get_intrinsic(nir_intrinsic_op intrinsic
,
41 bool run_vectorizer(nir_variable_mode modes
, bool cse
=false,
42 nir_variable_mode robust_modes
= (nir_variable_mode
)0);
44 nir_ssa_def
*get_resource(uint32_t binding
, bool ssbo
);
46 nir_intrinsic_instr
*create_indirect_load(nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
,
47 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
49 void create_indirect_store(nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
,
50 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
51 unsigned wrmask
=0xf, unsigned access
=0);
53 nir_intrinsic_instr
*create_load(nir_variable_mode mode
, uint32_t binding
, uint32_t offset
,
54 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
56 void create_store(nir_variable_mode mode
, uint32_t binding
, uint32_t offset
,
57 uint32_t id
, unsigned bit_size
=32, unsigned components
=1, unsigned wrmask
=0xf,
60 void create_shared_load(nir_deref_instr
*deref
, uint32_t id
,
61 unsigned bit_size
=32, unsigned components
=1);
62 void create_shared_store(nir_deref_instr
*deref
, uint32_t id
,
63 unsigned bit_size
=32, unsigned components
=1, unsigned wrmask
=0xf);
65 bool test_alu(nir_instr
*instr
, nir_op op
);
66 bool test_alu_def(nir_instr
*instr
, unsigned index
, nir_ssa_def
*def
, unsigned swizzle
=0);
68 static bool mem_vectorize_callback(unsigned align
, unsigned bit_size
,
69 unsigned num_components
, unsigned high_offset
,
70 nir_intrinsic_instr
*low
, nir_intrinsic_instr
*high
);
71 static void shared_type_info(const struct glsl_type
*type
, unsigned *size
, unsigned *align
);
76 std::map
<unsigned, nir_alu_src
*> loads
;
77 std::map
<unsigned, nir_ssa_def
*> res_map
;
80 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
82 glsl_type_singleton_init_or_ref();
84 mem_ctx
= ralloc_context(NULL
);
85 static const nir_shader_compiler_options options
= { };
86 b
= rzalloc(mem_ctx
, nir_builder
);
87 nir_builder_init_simple_shader(b
, mem_ctx
, MESA_SHADER_COMPUTE
, &options
);
90 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
93 printf("\nShader from the failed test:\n\n");
94 nir_print_shader(b
->shader
, stdout
);
99 glsl_type_singleton_decref();
103 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic
)
106 nir_foreach_block(block
, b
->impl
) {
107 nir_foreach_instr(instr
, block
) {
108 if (instr
->type
!= nir_instr_type_intrinsic
)
110 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
111 if (intrin
->intrinsic
== intrinsic
)
118 nir_intrinsic_instr
*
119 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic
,
122 nir_foreach_block(block
, b
->impl
) {
123 nir_foreach_instr(instr
, block
) {
124 if (instr
->type
!= nir_instr_type_intrinsic
)
126 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
127 if (intrin
->intrinsic
== intrinsic
) {
138 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes
,
140 nir_variable_mode robust_modes
)
142 if (modes
& nir_var_mem_shared
)
143 nir_lower_vars_to_explicit_types(b
->shader
, nir_var_mem_shared
, shared_type_info
);
144 bool progress
= nir_opt_load_store_vectorize(b
->shader
, modes
, mem_vectorize_callback
, robust_modes
);
146 nir_validate_shader(b
->shader
, NULL
);
148 nir_opt_cse(b
->shader
);
149 nir_copy_prop(b
->shader
);
150 nir_opt_algebraic(b
->shader
);
151 nir_opt_constant_folding(b
->shader
);
157 nir_load_store_vectorize_test::get_resource(uint32_t binding
, bool ssbo
)
159 if (res_map
.count(binding
))
160 return res_map
[binding
];
162 nir_intrinsic_instr
*res
= nir_intrinsic_instr_create(
163 b
->shader
, nir_intrinsic_vulkan_resource_index
);
164 nir_ssa_dest_init(&res
->instr
, &res
->dest
, 1, 32, NULL
);
165 res
->num_components
= 1;
166 res
->src
[0] = nir_src_for_ssa(nir_imm_zero(b
, 1, 32));
167 nir_intrinsic_set_desc_type(
168 res
, ssbo
? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
169 nir_intrinsic_set_desc_set(res
, 0);
170 nir_intrinsic_set_binding(res
, binding
);
171 nir_builder_instr_insert(b
, &res
->instr
);
172 res_map
[binding
] = &res
->dest
.ssa
;
173 return &res
->dest
.ssa
;
176 nir_intrinsic_instr
*
177 nir_load_store_vectorize_test::create_indirect_load(
178 nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
, uint32_t id
,
179 unsigned bit_size
, unsigned components
, unsigned access
)
181 nir_intrinsic_op intrinsic
;
182 nir_ssa_def
*res
= NULL
;
184 case nir_var_mem_ubo
:
185 intrinsic
= nir_intrinsic_load_ubo
;
186 res
= get_resource(binding
, false);
188 case nir_var_mem_ssbo
:
189 intrinsic
= nir_intrinsic_load_ssbo
;
190 res
= get_resource(binding
, true);
192 case nir_var_mem_push_const
:
193 intrinsic
= nir_intrinsic_load_push_constant
;
198 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, intrinsic
);
199 nir_ssa_dest_init(&load
->instr
, &load
->dest
, components
, bit_size
, NULL
);
200 load
->num_components
= components
;
202 load
->src
[0] = nir_src_for_ssa(res
);
203 load
->src
[1] = nir_src_for_ssa(offset
);
205 load
->src
[0] = nir_src_for_ssa(offset
);
207 if (mode
!= nir_var_mem_push_const
) {
208 nir_intrinsic_set_align(load
, (bit_size
== 1 ? 32 : bit_size
) / 8, 0);
209 nir_intrinsic_set_access(load
, (gl_access_qualifier
)access
);
211 nir_builder_instr_insert(b
, &load
->instr
);
212 nir_instr
*mov
= nir_mov(b
, &load
->dest
.ssa
)->parent_instr
;
213 loads
[id
] = &nir_instr_as_alu(mov
)->src
[0];
219 nir_load_store_vectorize_test::create_indirect_store(
220 nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
, uint32_t id
,
221 unsigned bit_size
, unsigned components
, unsigned wrmask
, unsigned access
)
223 nir_const_value values
[NIR_MAX_VEC_COMPONENTS
];
224 for (unsigned i
= 0; i
< components
; i
++)
225 values
[i
] = nir_const_value_for_raw_uint((id
<< 4) | i
, bit_size
);
226 nir_ssa_def
*value
= nir_build_imm(b
, components
, bit_size
, values
);
228 nir_intrinsic_op intrinsic
;
229 nir_ssa_def
*res
= NULL
;
231 case nir_var_mem_ssbo
:
232 intrinsic
= nir_intrinsic_store_ssbo
;
233 res
= get_resource(binding
, true);
235 case nir_var_mem_shared
:
236 intrinsic
= nir_intrinsic_store_shared
;
241 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
->shader
, intrinsic
);
242 nir_ssa_dest_init(&store
->instr
, &store
->dest
, components
, bit_size
, NULL
);
243 store
->num_components
= components
;
245 store
->src
[0] = nir_src_for_ssa(value
);
246 store
->src
[1] = nir_src_for_ssa(res
);
247 store
->src
[2] = nir_src_for_ssa(offset
);
249 store
->src
[0] = nir_src_for_ssa(value
);
250 store
->src
[1] = nir_src_for_ssa(offset
);
252 nir_intrinsic_set_align(store
, (bit_size
== 1 ? 32 : bit_size
) / 8, 0);
253 nir_intrinsic_set_access(store
, (gl_access_qualifier
)access
);
254 nir_intrinsic_set_write_mask(store
, wrmask
& ((1 << components
) - 1));
255 nir_builder_instr_insert(b
, &store
->instr
);
258 nir_intrinsic_instr
*
259 nir_load_store_vectorize_test::create_load(
260 nir_variable_mode mode
, uint32_t binding
, uint32_t offset
, uint32_t id
,
261 unsigned bit_size
, unsigned components
, unsigned access
)
263 return create_indirect_load(mode
, binding
, nir_imm_int(b
, offset
), id
, bit_size
, components
, access
);
267 nir_load_store_vectorize_test::create_store(
268 nir_variable_mode mode
, uint32_t binding
, uint32_t offset
, uint32_t id
,
269 unsigned bit_size
, unsigned components
, unsigned wrmask
, unsigned access
)
271 create_indirect_store(mode
, binding
, nir_imm_int(b
, offset
), id
, bit_size
, components
, wrmask
, access
);
274 void nir_load_store_vectorize_test::create_shared_load(
275 nir_deref_instr
*deref
, uint32_t id
, unsigned bit_size
, unsigned components
)
277 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_deref
);
278 nir_ssa_dest_init(&load
->instr
, &load
->dest
, components
, bit_size
, NULL
);
279 load
->num_components
= components
;
280 load
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
281 nir_builder_instr_insert(b
, &load
->instr
);
282 nir_instr
*mov
= nir_mov(b
, &load
->dest
.ssa
)->parent_instr
;
283 loads
[id
] = &nir_instr_as_alu(mov
)->src
[0];
286 void nir_load_store_vectorize_test::create_shared_store(
287 nir_deref_instr
*deref
, uint32_t id
,
288 unsigned bit_size
, unsigned components
, unsigned wrmask
)
290 nir_const_value values
[NIR_MAX_VEC_COMPONENTS
];
291 for (unsigned i
= 0; i
< components
; i
++)
292 values
[i
] = nir_const_value_for_raw_uint((id
<< 4) | i
, bit_size
);
293 nir_ssa_def
*value
= nir_build_imm(b
, components
, bit_size
, values
);
295 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_deref
);
296 nir_ssa_dest_init(&store
->instr
, &store
->dest
, components
, bit_size
, NULL
);
297 store
->num_components
= components
;
298 store
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
299 store
->src
[1] = nir_src_for_ssa(value
);
300 nir_intrinsic_set_write_mask(store
, wrmask
& ((1 << components
) - 1));
301 nir_builder_instr_insert(b
, &store
->instr
);
304 bool nir_load_store_vectorize_test::test_alu(nir_instr
*instr
, nir_op op
)
306 return instr
->type
== nir_instr_type_alu
&& nir_instr_as_alu(instr
)->op
== op
;
309 bool nir_load_store_vectorize_test::test_alu_def(
310 nir_instr
*instr
, unsigned index
, nir_ssa_def
*def
, unsigned swizzle
)
312 if (instr
->type
!= nir_instr_type_alu
)
315 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
317 if (index
>= nir_op_infos
[alu
->op
].num_inputs
)
319 if (alu
->src
[index
].src
.ssa
!= def
)
321 if (alu
->src
[index
].swizzle
[0] != swizzle
)
327 bool nir_load_store_vectorize_test::mem_vectorize_callback(
328 unsigned align
, unsigned bit_size
, unsigned num_components
, unsigned high_offset
,
329 nir_intrinsic_instr
*low
, nir_intrinsic_instr
*high
)
334 void nir_load_store_vectorize_test::shared_type_info(
335 const struct glsl_type
*type
, unsigned *size
, unsigned *align
)
337 assert(glsl_type_is_vector_or_scalar(type
));
339 uint32_t comp_size
= glsl_type_is_boolean(type
)
340 ? 4 : glsl_get_bit_size(type
) / 8;
341 unsigned length
= glsl_get_vector_elements(type
);
342 *size
= comp_size
* length
,
347 TEST_F(nir_load_store_vectorize_test
, ubo_load_adjacent
)
349 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
350 create_load(nir_var_mem_ubo
, 0, 4, 0x2);
352 nir_validate_shader(b
->shader
, NULL
);
353 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
355 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
357 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
359 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
360 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
361 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
362 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
363 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
364 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
365 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
366 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
369 TEST_F(nir_load_store_vectorize_test
, ubo_load_intersecting
)
371 create_load(nir_var_mem_ubo
, 0, 0, 0x1, 32, 2);
372 create_load(nir_var_mem_ubo
, 0, 4, 0x2, 32, 2);
374 nir_validate_shader(b
->shader
, NULL
);
375 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
377 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
379 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
381 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
382 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
383 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
384 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
385 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
386 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
387 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
388 ASSERT_EQ(loads
[0x1]->swizzle
[1], 1);
389 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
390 ASSERT_EQ(loads
[0x2]->swizzle
[1], 2);
393 TEST_F(nir_load_store_vectorize_test
, ubo_load_identical
)
395 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
396 create_load(nir_var_mem_ubo
, 0, 0, 0x2);
398 nir_validate_shader(b
->shader
, NULL
);
399 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
401 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
403 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
405 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
406 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
407 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
408 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
409 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
410 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
411 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
412 ASSERT_EQ(loads
[0x2]->swizzle
[0], 0);
415 TEST_F(nir_load_store_vectorize_test
, ubo_load_large
)
417 create_load(nir_var_mem_ubo
, 0, 0, 0x1, 32, 2);
418 create_load(nir_var_mem_ubo
, 0, 8, 0x2, 32, 3);
420 nir_validate_shader(b
->shader
, NULL
);
421 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
423 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
425 nir_validate_shader(b
->shader
, NULL
);
426 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
429 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent
)
431 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
432 create_load(nir_var_mem_push_const
, 0, 4, 0x2);
434 nir_validate_shader(b
->shader
, NULL
);
435 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
437 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
439 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
441 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
442 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
443 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
444 ASSERT_EQ(nir_src_as_uint(load
->src
[0]), 0);
445 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
446 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
447 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
448 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
451 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent_base
)
453 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
454 nir_intrinsic_set_base(create_load(nir_var_mem_push_const
, 0, 0, 0x2), 4);
456 nir_validate_shader(b
->shader
, NULL
);
457 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
459 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
461 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
463 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
464 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
465 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
466 ASSERT_EQ(nir_src_as_uint(load
->src
[0]), 0);
467 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
468 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
469 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
470 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
473 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent
)
475 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
476 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
478 nir_validate_shader(b
->shader
, NULL
);
479 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
481 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
483 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
485 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
486 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
487 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
488 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
489 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
490 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
491 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
492 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
495 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect
)
497 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
498 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x1);
499 create_indirect_load(nir_var_mem_ssbo
, 0, nir_iadd_imm(b
, index_base
, 4), 0x2);
501 nir_validate_shader(b
->shader
, NULL
);
502 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
504 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
506 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
508 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
509 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
510 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
511 ASSERT_EQ(load
->src
[1].ssa
, index_base
);
512 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
513 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
514 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
515 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
518 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect_sub
)
520 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
521 nir_ssa_def
*index_base_prev
= nir_iadd_imm(b
, index_base
, 0xfffffffc);
522 create_indirect_load(nir_var_mem_ssbo
, 0, index_base_prev
, 0x1);
523 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x2);
525 nir_validate_shader(b
->shader
, NULL
);
526 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
528 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
530 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
532 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
533 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
534 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
535 ASSERT_EQ(load
->src
[1].ssa
, index_base_prev
);
536 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
537 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
538 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
539 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
542 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect_neg_stride
)
544 nir_ssa_def
*inv
= nir_load_local_invocation_index(b
);
545 nir_ssa_def
*inv_plus_one
= nir_iadd_imm(b
, inv
, 1);
546 nir_ssa_def
*index_base
= nir_imul_imm(b
, inv
, 0xfffffffc);
547 nir_ssa_def
*index_base_prev
= nir_imul_imm(b
, inv_plus_one
, 0xfffffffc);
548 create_indirect_load(nir_var_mem_ssbo
, 0, index_base_prev
, 0x1);
549 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x2);
551 nir_validate_shader(b
->shader
, NULL
);
552 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
554 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
556 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
558 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
559 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
560 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
561 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
562 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
563 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
564 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
566 /* nir_opt_algebraic optimizes the imul */
567 ASSERT_TRUE(test_alu(load
->src
[1].ssa
->parent_instr
, nir_op_ineg
));
568 nir_ssa_def
*offset
= nir_instr_as_alu(load
->src
[1].ssa
->parent_instr
)->src
[0].src
.ssa
;
569 ASSERT_TRUE(test_alu(offset
->parent_instr
, nir_op_ishl
));
570 nir_alu_instr
*shl
= nir_instr_as_alu(offset
->parent_instr
);
571 ASSERT_EQ(shl
->src
[0].src
.ssa
, inv_plus_one
);
572 ASSERT_EQ(nir_src_as_uint(shl
->src
[1].src
), 2);
575 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_adjacent
)
577 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
578 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
579 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
581 nir_validate_shader(b
->shader
, NULL
);
582 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
584 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
586 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
588 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
589 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
590 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
591 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
592 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
593 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
594 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
595 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
598 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_intersecting
)
600 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
601 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
602 create_load(nir_var_mem_ssbo
, 0, 0, 0x3, 32, 2);
604 nir_validate_shader(b
->shader
, NULL
);
605 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
607 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
609 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
612 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_identical
)
614 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
615 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
616 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
618 nir_validate_shader(b
->shader
, NULL
);
619 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
621 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
623 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
626 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical_load_identical
)
628 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
629 create_load(nir_var_mem_ssbo
, 0, 0, 0x2);
630 create_store(nir_var_mem_ssbo
, 0, 0, 0x3);
632 nir_validate_shader(b
->shader
, NULL
);
633 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
635 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
637 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
640 /* if nir_opt_load_store_vectorize were implemented like many load/store
641 * optimization passes are (for example, nir_opt_combine_stores and
642 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
643 * encountered, this case wouldn't be optimized.
644 * A similar test for derefs is shared_load_adjacent_store_identical. */
645 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_store_identical
)
647 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
648 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
649 create_load(nir_var_mem_ssbo
, 0, 4, 0x3);
651 nir_validate_shader(b
->shader
, NULL
);
652 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
653 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
655 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
657 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
658 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
660 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
661 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
662 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
663 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
664 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
665 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
666 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
667 ASSERT_EQ(loads
[0x3]->swizzle
[0], 1);
670 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent
)
672 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
673 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
675 nir_validate_shader(b
->shader
, NULL
);
676 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
678 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
680 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
682 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
683 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
684 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x3);
685 nir_ssa_def
*val
= store
->src
[0].ssa
;
686 ASSERT_EQ(val
->bit_size
, 32);
687 ASSERT_EQ(val
->num_components
, 2);
688 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
689 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
690 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
693 TEST_F(nir_load_store_vectorize_test
, ssbo_store_intersecting
)
695 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
696 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 32, 2);
698 nir_validate_shader(b
->shader
, NULL
);
699 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
701 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
703 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
705 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
706 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
707 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
708 nir_ssa_def
*val
= store
->src
[0].ssa
;
709 ASSERT_EQ(val
->bit_size
, 32);
710 ASSERT_EQ(val
->num_components
, 3);
711 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
712 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
713 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
714 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x21);
717 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical
)
719 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
720 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
722 nir_validate_shader(b
->shader
, NULL
);
723 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
725 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
727 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
729 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
730 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
731 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x1);
732 nir_ssa_def
*val
= store
->src
[0].ssa
;
733 ASSERT_EQ(val
->bit_size
, 32);
734 ASSERT_EQ(val
->num_components
, 1);
735 ASSERT_EQ(nir_src_as_uint(store
->src
[0]), 0x20);
738 TEST_F(nir_load_store_vectorize_test
, ssbo_store_large
)
740 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
741 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 32, 3);
743 nir_validate_shader(b
->shader
, NULL
);
744 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
746 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
748 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
751 TEST_F(nir_load_store_vectorize_test
, ubo_load_adjacent_memory_barrier
)
753 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
755 nir_scoped_memory_barrier(b
, NIR_SCOPE_DEVICE
, NIR_MEMORY_ACQ_REL
,
758 create_load(nir_var_mem_ubo
, 0, 4, 0x2);
760 nir_validate_shader(b
->shader
, NULL
);
761 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
763 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
765 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
768 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_memory_barrier
)
770 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
772 nir_scoped_memory_barrier(b
, NIR_SCOPE_DEVICE
, NIR_MEMORY_ACQ_REL
,
775 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
777 nir_validate_shader(b
->shader
, NULL
);
778 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
780 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
782 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
785 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
786 * doesn't require that loads/stores complete.
788 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_barrier
)
790 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
791 nir_builder_instr_insert(b
, &nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_control_barrier
)->instr
);
792 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
794 nir_validate_shader(b
->shader
, NULL
);
795 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
797 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
799 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
802 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_memory_barrier_shared
)
804 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
806 nir_scoped_memory_barrier(b
, NIR_SCOPE_WORKGROUP
, NIR_MEMORY_ACQ_REL
,
809 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
811 nir_validate_shader(b
->shader
, NULL
);
812 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
814 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
816 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
819 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_8_8_16
)
821 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 8);
822 create_load(nir_var_mem_ssbo
, 0, 1, 0x2, 8);
823 create_load(nir_var_mem_ssbo
, 0, 2, 0x3, 16);
825 nir_validate_shader(b
->shader
, NULL
);
826 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 3);
828 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
830 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
832 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
833 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 8);
834 ASSERT_EQ(load
->dest
.ssa
.num_components
, 4);
835 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
836 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
837 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
838 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
839 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
841 nir_ssa_def
*val
= loads
[0x3]->src
.ssa
;
842 ASSERT_EQ(val
->bit_size
, 16);
843 ASSERT_EQ(val
->num_components
, 1);
844 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_ior
));
845 nir_ssa_def
*low
= nir_instr_as_alu(val
->parent_instr
)->src
[0].src
.ssa
;
846 nir_ssa_def
*high
= nir_instr_as_alu(val
->parent_instr
)->src
[1].src
.ssa
;
847 ASSERT_TRUE(test_alu(high
->parent_instr
, nir_op_ishl
));
848 high
= nir_instr_as_alu(high
->parent_instr
)->src
[0].src
.ssa
;
849 ASSERT_TRUE(test_alu(low
->parent_instr
, nir_op_u2u16
));
850 ASSERT_TRUE(test_alu(high
->parent_instr
, nir_op_u2u16
));
851 ASSERT_TRUE(test_alu_def(low
->parent_instr
, 0, &load
->dest
.ssa
, 2));
852 ASSERT_TRUE(test_alu_def(high
->parent_instr
, 0, &load
->dest
.ssa
, 3));
855 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_32_32_64
)
857 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
858 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
860 nir_validate_shader(b
->shader
, NULL
);
861 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
863 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
865 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
867 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
868 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
869 ASSERT_EQ(load
->dest
.ssa
.num_components
, 4);
870 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
871 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
872 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
873 ASSERT_EQ(loads
[0x1]->swizzle
[1], 1);
875 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
876 ASSERT_EQ(val
->bit_size
, 64);
877 ASSERT_EQ(val
->num_components
, 1);
878 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
879 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
880 ASSERT_EQ(pack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
881 ASSERT_EQ(pack
->src
[0].swizzle
[0], 2);
882 ASSERT_EQ(pack
->src
[0].swizzle
[1], 3);
885 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_32_32_64_64
)
887 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
888 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
889 create_load(nir_var_mem_ssbo
, 0, 16, 0x3, 64);
891 nir_validate_shader(b
->shader
, NULL
);
892 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 3);
894 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
, true));
896 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
898 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
899 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 64);
900 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
901 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
902 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
903 ASSERT_EQ(loads
[0x3]->swizzle
[0], 2);
905 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
906 ASSERT_EQ(val
->bit_size
, 64);
907 ASSERT_EQ(val
->num_components
, 1);
908 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_mov
));
909 nir_alu_instr
*mov
= nir_instr_as_alu(val
->parent_instr
);
910 ASSERT_EQ(mov
->src
[0].src
.ssa
, &load
->dest
.ssa
);
911 ASSERT_EQ(mov
->src
[0].swizzle
[0], 1);
913 val
= loads
[0x1]->src
.ssa
;
914 ASSERT_EQ(val
->bit_size
, 32);
915 ASSERT_EQ(val
->num_components
, 2);
916 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_unpack_64_2x32
));
917 nir_alu_instr
*unpack
= nir_instr_as_alu(val
->parent_instr
);
918 ASSERT_EQ(unpack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
919 ASSERT_EQ(unpack
->src
[0].swizzle
[0], 0);
922 TEST_F(nir_load_store_vectorize_test
, ssbo_load_intersecting_32_32_64
)
924 create_load(nir_var_mem_ssbo
, 0, 4, 0x1, 32, 2);
925 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
927 nir_validate_shader(b
->shader
, NULL
);
928 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
930 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
932 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
934 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
935 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
936 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
937 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 4);
938 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
939 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
940 ASSERT_EQ(loads
[0x1]->swizzle
[1], 1);
942 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
943 ASSERT_EQ(val
->bit_size
, 64);
944 ASSERT_EQ(val
->num_components
, 1);
945 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
946 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
947 ASSERT_EQ(pack
->src
[0].src
.ssa
, &load
->dest
.ssa
);
948 ASSERT_EQ(pack
->src
[0].swizzle
[0], 1);
949 ASSERT_EQ(pack
->src
[0].swizzle
[1], 2);
952 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_8_8_16
)
954 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 8);
955 create_store(nir_var_mem_ssbo
, 0, 1, 0x2, 8);
956 create_store(nir_var_mem_ssbo
, 0, 2, 0x3, 16);
958 nir_validate_shader(b
->shader
, NULL
);
959 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 3);
961 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
963 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
965 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
966 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
967 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
968 nir_ssa_def
*val
= store
->src
[0].ssa
;
969 ASSERT_EQ(val
->bit_size
, 8);
970 ASSERT_EQ(val
->num_components
, 4);
971 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
972 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 8), 0x10);
973 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 8), 0x20);
974 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 8), 0x30);
975 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 8), 0x0);
978 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_32_64
)
980 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
981 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
983 nir_validate_shader(b
->shader
, NULL
);
984 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
986 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
988 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
990 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
991 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
992 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
993 nir_ssa_def
*val
= store
->src
[0].ssa
;
994 ASSERT_EQ(val
->bit_size
, 32);
995 ASSERT_EQ(val
->num_components
, 4);
996 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
997 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
998 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x11);
999 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x20);
1000 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 32), 0x0);
1003 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_32_64_64
)
1005 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
1006 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
1007 create_store(nir_var_mem_ssbo
, 0, 16, 0x3, 64);
1009 nir_validate_shader(b
->shader
, NULL
);
1010 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 3);
1012 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1014 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1016 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1017 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1018 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
1019 nir_ssa_def
*val
= store
->src
[0].ssa
;
1020 ASSERT_EQ(val
->bit_size
, 64);
1021 ASSERT_EQ(val
->num_components
, 3);
1022 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1023 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 64), 0x1100000010ull
);
1024 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 64), 0x20);
1025 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 64), 0x30);
1028 TEST_F(nir_load_store_vectorize_test
, ssbo_store_intersecting_32_32_64
)
1030 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
1031 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 64);
1033 nir_validate_shader(b
->shader
, NULL
);
1034 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1036 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1038 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1040 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1041 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1042 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
1043 nir_ssa_def
*val
= store
->src
[0].ssa
;
1044 ASSERT_EQ(val
->bit_size
, 32);
1045 ASSERT_EQ(val
->num_components
, 3);
1046 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1047 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1048 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
1049 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x0);
1052 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_64
)
1054 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32);
1055 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 64, 2);
1057 nir_validate_shader(b
->shader
, NULL
);
1058 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1060 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1062 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1065 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical_wrmask
)
1067 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 4, 1 | 4);
1068 create_store(nir_var_mem_ssbo
, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1070 nir_validate_shader(b
->shader
, NULL
);
1071 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1073 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1075 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1077 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1078 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1079 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
1080 nir_ssa_def
*val
= store
->src
[0].ssa
;
1081 ASSERT_EQ(val
->bit_size
, 32);
1082 ASSERT_EQ(val
->num_components
, 4);
1083 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1084 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1085 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x21);
1086 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x22);
1087 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 32), 0x23);
1090 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent
)
1092 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1093 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1095 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1096 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1098 nir_validate_shader(b
->shader
, NULL
);
1099 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1101 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1103 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1105 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1106 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1107 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1109 deref
= nir_src_as_deref(load
->src
[0]);
1110 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1112 deref
= nir_deref_instr_parent(deref
);
1113 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1114 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1116 deref
= nir_deref_instr_parent(deref
);
1117 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1118 ASSERT_EQ(deref
->var
, var
);
1120 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1121 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1122 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1123 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1126 TEST_F(nir_load_store_vectorize_test
, shared_load_distant_64bit
)
1128 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1129 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1130 nir_ssa_dest_init(&deref
->instr
, &deref
->dest
, 1, 64, NULL
);
1132 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0x100000000), 0x1);
1133 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0x200000001), 0x2);
1135 nir_validate_shader(b
->shader
, NULL
);
1136 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1138 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared
));
1140 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1143 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_indirect
)
1145 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1146 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1147 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1149 create_shared_load(nir_build_deref_array(b
, deref
, index_base
), 0x1);
1150 create_shared_load(nir_build_deref_array(b
, deref
, nir_iadd_imm(b
, index_base
, 1)), 0x2);
1152 nir_validate_shader(b
->shader
, NULL
);
1153 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1155 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1157 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1159 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1160 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1161 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1163 deref
= nir_src_as_deref(load
->src
[0]);
1164 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1166 deref
= nir_deref_instr_parent(deref
);
1167 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1168 ASSERT_EQ(deref
->arr
.index
.ssa
, index_base
);
1170 deref
= nir_deref_instr_parent(deref
);
1171 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1172 ASSERT_EQ(deref
->var
, var
);
1174 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1175 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1176 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1177 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1180 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_indirect_sub
)
1182 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1183 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1184 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1185 nir_ssa_def
*index_base_prev
= nir_iadd_imm(b
, index_base
, 0xffffffff);
1187 create_shared_load(nir_build_deref_array(b
, deref
, index_base_prev
), 0x1);
1188 create_shared_load(nir_build_deref_array(b
, deref
, index_base
), 0x2);
1190 nir_validate_shader(b
->shader
, NULL
);
1191 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1193 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1195 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1197 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1198 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1199 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1201 deref
= nir_src_as_deref(load
->src
[0]);
1202 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1204 deref
= nir_deref_instr_parent(deref
);
1205 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1206 ASSERT_EQ(deref
->arr
.index
.ssa
, index_base_prev
);
1208 deref
= nir_deref_instr_parent(deref
);
1209 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1210 ASSERT_EQ(deref
->var
, var
);
1212 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1213 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1214 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1215 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1218 TEST_F(nir_load_store_vectorize_test
, shared_load_struct
)
1220 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1221 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1223 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1224 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1226 create_shared_load(nir_build_deref_struct(b
, deref
, 0), 0x1);
1227 create_shared_load(nir_build_deref_array_imm(b
, nir_build_deref_struct(b
, deref
, 1), 0), 0x2);
1229 nir_validate_shader(b
->shader
, NULL
);
1230 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1232 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1234 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1236 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1237 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1238 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1240 deref
= nir_src_as_deref(load
->src
[0]);
1241 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1243 deref
= nir_deref_instr_parent(deref
);
1244 ASSERT_EQ(deref
->deref_type
, nir_deref_type_struct
);
1245 ASSERT_EQ(deref
->strct
.index
, 0);
1247 deref
= nir_deref_instr_parent(deref
);
1248 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1249 ASSERT_EQ(deref
->var
, var
);
1251 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1252 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1253 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1254 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1257 TEST_F(nir_load_store_vectorize_test
, shared_load_identical_store_adjacent
)
1259 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1260 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1262 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1263 create_shared_store(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1264 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x3);
1266 nir_validate_shader(b
->shader
, NULL
);
1267 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1268 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1270 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1272 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1273 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1275 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1276 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1277 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1279 deref
= nir_src_as_deref(load
->src
[0]);
1280 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1281 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1283 deref
= nir_deref_instr_parent(deref
);
1284 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1285 ASSERT_EQ(deref
->var
, var
);
1287 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1288 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1289 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1290 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1293 TEST_F(nir_load_store_vectorize_test
, shared_load_identical_store_identical
)
1295 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1296 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1298 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1299 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x2);
1300 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x3);
1302 nir_validate_shader(b
->shader
, NULL
);
1303 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1305 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared
));
1307 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1310 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_store_identical
)
1312 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1313 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1315 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1316 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x2);
1317 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x3);
1319 nir_validate_shader(b
->shader
, NULL
);
1320 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1321 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1323 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1325 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1326 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1328 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1329 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1330 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1332 deref
= nir_src_as_deref(load
->src
[0]);
1333 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1335 deref
= nir_deref_instr_parent(deref
);
1336 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1337 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1339 deref
= nir_deref_instr_parent(deref
);
1340 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1341 ASSERT_EQ(deref
->var
, var
);
1343 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1344 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1345 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1346 ASSERT_EQ(loads
[0x3]->swizzle
[0], 1);
1349 TEST_F(nir_load_store_vectorize_test
, shared_load_bool
)
1351 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1352 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1354 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1, 1);
1355 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x2, 1);
1357 nir_validate_shader(b
->shader
, NULL
);
1358 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1360 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1362 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1364 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1365 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1366 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1368 deref
= nir_src_as_deref(load
->src
[0]);
1369 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1371 deref
= nir_deref_instr_parent(deref
);
1372 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1373 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1375 deref
= nir_deref_instr_parent(deref
);
1376 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1377 ASSERT_EQ(deref
->var
, var
);
1379 ASSERT_TRUE(test_alu(loads
[0x1]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1380 ASSERT_TRUE(test_alu(loads
[0x2]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1381 ASSERT_TRUE(test_alu_def(loads
[0x1]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 0));
1382 ASSERT_TRUE(test_alu_def(loads
[0x2]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 1));
1385 TEST_F(nir_load_store_vectorize_test
, shared_load_bool_mixed
)
1387 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1388 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1390 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1391 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1393 create_shared_load(nir_build_deref_struct(b
, deref
, 0), 0x1, 1);
1394 create_shared_load(nir_build_deref_array_imm(b
, nir_build_deref_struct(b
, deref
, 1), 0), 0x2);
1396 nir_validate_shader(b
->shader
, NULL
);
1397 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1399 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1401 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1403 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1404 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1405 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1407 deref
= nir_src_as_deref(load
->src
[0]);
1408 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1410 deref
= nir_deref_instr_parent(deref
);
1411 ASSERT_EQ(deref
->deref_type
, nir_deref_type_struct
);
1412 ASSERT_EQ(deref
->strct
.index
, 0);
1414 deref
= nir_deref_instr_parent(deref
);
1415 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1416 ASSERT_EQ(deref
->var
, var
);
1418 ASSERT_TRUE(test_alu(loads
[0x1]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1419 ASSERT_TRUE(test_alu_def(loads
[0x1]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 0));
1420 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1421 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1424 TEST_F(nir_load_store_vectorize_test
, shared_store_adjacent
)
1426 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1427 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1429 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1430 create_shared_store(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1432 nir_validate_shader(b
->shader
, NULL
);
1433 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 2);
1435 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1437 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1439 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_deref
, 0);
1440 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x3);
1441 nir_ssa_def
*val
= store
->src
[1].ssa
;
1442 ASSERT_EQ(val
->bit_size
, 32);
1443 ASSERT_EQ(val
->num_components
, 2);
1444 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1445 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1446 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
1448 deref
= nir_src_as_deref(store
->src
[0]);
1449 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1451 deref
= nir_deref_instr_parent(deref
);
1452 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1453 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1455 deref
= nir_deref_instr_parent(deref
);
1456 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1457 ASSERT_EQ(deref
->var
, var
);
1460 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_base
)
1462 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1463 nir_intrinsic_set_base(create_load(nir_var_mem_push_const
, 0, 4, 0x2), 4);
1465 nir_validate_shader(b
->shader
, NULL
);
1466 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1468 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1470 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1473 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_direct_direct
)
1475 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1476 create_load(nir_var_mem_push_const
, 0, 8, 0x2);
1478 nir_validate_shader(b
->shader
, NULL
);
1479 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1481 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1483 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1486 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_direct_indirect
)
1488 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1489 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1490 create_indirect_load(nir_var_mem_push_const
, 0, index_base
, 0x2);
1492 nir_validate_shader(b
->shader
, NULL
);
1493 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1495 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1497 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1500 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_indirect_indirect
)
1502 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1503 create_indirect_load(nir_var_mem_push_const
, 0,
1504 nir_iadd(b
, nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 2)), nir_imm_int(b
, 16)), nir_imm_int(b
, 32)), 0x1);
1505 create_indirect_load(nir_var_mem_push_const
, 0,
1506 nir_iadd(b
, nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 3)), nir_imm_int(b
, 16)), nir_imm_int(b
, 32)), 0x2);
1508 nir_validate_shader(b
->shader
, NULL
);
1509 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1511 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1513 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1516 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent_complex_indirect
)
1518 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1519 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1520 nir_ssa_def
*low
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 12));
1521 nir_ssa_def
*high
= nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 1)), nir_imm_int(b
, 16));
1522 create_indirect_load(nir_var_mem_push_const
, 0, low
, 0x1);
1523 create_indirect_load(nir_var_mem_push_const
, 0, high
, 0x2);
1525 nir_validate_shader(b
->shader
, NULL
);
1526 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1528 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
1530 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
1532 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
1533 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1534 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1535 ASSERT_EQ(load
->src
[0].ssa
, low
);
1536 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1537 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
1538 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1539 ASSERT_EQ(loads
[0x2]->swizzle
[0], 1);
1542 TEST_F(nir_load_store_vectorize_test
, ssbo_alias0
)
1544 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1545 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
1546 create_indirect_store(nir_var_mem_ssbo
, 0, index_base
, 0x2);
1547 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
1549 nir_validate_shader(b
->shader
, NULL
);
1550 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1552 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1554 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1557 TEST_F(nir_load_store_vectorize_test
, ssbo_alias1
)
1559 nir_ssa_def
*load_base
= nir_load_global_invocation_index(b
, 32);
1560 nir_ssa_def
*store_base
= nir_load_local_invocation_index(b
);
1561 create_indirect_load(nir_var_mem_ssbo
, 0, load_base
, 0x1);
1562 create_indirect_store(nir_var_mem_ssbo
, 0, store_base
, 0x2);
1563 create_indirect_load(nir_var_mem_ssbo
, 0, load_base
, 0x3);
1565 nir_validate_shader(b
->shader
, NULL
);
1566 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1568 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1570 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1573 TEST_F(nir_load_store_vectorize_test
, DISABLED_ssbo_alias2
)
1575 /* TODO: try to combine these loads */
1576 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1577 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 4));
1578 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1579 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1580 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1582 nir_validate_shader(b
->shader
, NULL
);
1583 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1585 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1587 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1589 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1590 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1591 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1592 ASSERT_EQ(load
->src
[1].ssa
, offset
);
1593 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1594 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1595 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1596 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1599 TEST_F(nir_load_store_vectorize_test
, ssbo_alias3
)
1601 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1602 * these loads can't be combined because if index_base == 268435455, then
1603 * offset == 0 because the addition would wrap around */
1604 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1605 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 16));
1606 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1607 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1608 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1610 nir_validate_shader(b
->shader
, NULL
);
1611 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1613 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1615 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1618 TEST_F(nir_load_store_vectorize_test
, DISABLED_ssbo_alias4
)
1620 /* TODO: try to combine these loads */
1621 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1622 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 16));
1623 nir_instr_as_alu(offset
->parent_instr
)->no_unsigned_wrap
= true;
1624 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1625 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1626 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1628 nir_validate_shader(b
->shader
, NULL
);
1629 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1631 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1633 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1635 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1636 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1637 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1638 ASSERT_EQ(load
->src
[1].ssa
, offset
);
1639 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1640 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1641 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1642 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1645 TEST_F(nir_load_store_vectorize_test
, ssbo_alias5
)
1647 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
1648 create_store(nir_var_mem_ssbo
, 1, 0, 0x2);
1649 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
1651 nir_validate_shader(b
->shader
, NULL
);
1652 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1654 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1656 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1659 TEST_F(nir_load_store_vectorize_test
, ssbo_alias6
)
1661 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT
);
1662 create_store(nir_var_mem_ssbo
, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT
);
1663 create_load(nir_var_mem_ssbo
, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT
);
1665 nir_validate_shader(b
->shader
, NULL
);
1666 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1668 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1670 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1672 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1673 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1674 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1675 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
1676 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1677 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1678 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1679 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1682 TEST_F(nir_load_store_vectorize_test
, DISABLED_shared_alias0
)
1684 /* TODO: implement type-based alias analysis so that these loads can be
1685 * combined. this is made a bit more difficult than simply using
1686 * nir_compare_derefs() because the vectorizer creates loads/stores with
1687 * casted derefs. The solution would probably be to keep multiple derefs for
1688 * an entry (one for each load/store combined into it). */
1689 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1690 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1692 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1693 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1695 nir_ssa_def
*index0
= nir_load_local_invocation_index(b
);
1696 nir_ssa_def
*index1
= nir_load_global_invocation_index(b
, 32);
1697 nir_deref_instr
*load_deref
= nir_build_deref_array(b
, nir_build_deref_struct(b
, deref
, 0), index0
);
1699 create_shared_load(load_deref
, 0x1);
1700 create_shared_store(nir_build_deref_array(b
, nir_build_deref_struct(b
, deref
, 1), index1
), 0x2);
1701 create_shared_load(load_deref
, 0x3);
1703 nir_validate_shader(b
->shader
, NULL
);
1704 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1706 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1708 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1710 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1711 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1712 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1713 ASSERT_EQ(load
->src
[0].ssa
, &load_deref
->dest
.ssa
);
1714 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1715 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1716 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1717 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1720 TEST_F(nir_load_store_vectorize_test
, shared_alias1
)
1722 nir_variable
*var0
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_uint_type(), "var0");
1723 nir_variable
*var1
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_uint_type(), "var1");
1724 nir_deref_instr
*load_deref
= nir_build_deref_var(b
, var0
);
1726 create_shared_load(load_deref
, 0x1);
1727 create_shared_store(nir_build_deref_var(b
, var1
), 0x2);
1728 create_shared_load(load_deref
, 0x3);
1730 nir_validate_shader(b
->shader
, NULL
);
1731 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1733 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1735 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1737 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1738 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1739 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1740 ASSERT_EQ(load
->src
[0].ssa
, &load_deref
->dest
.ssa
);
1741 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
1742 ASSERT_EQ(loads
[0x3]->src
.ssa
, &load
->dest
.ssa
);
1743 ASSERT_EQ(loads
[0x1]->swizzle
[0], 0);
1744 ASSERT_EQ(loads
[0x3]->swizzle
[0], 0);
1747 TEST_F(nir_load_store_vectorize_test
, ssbo_load_distant_64bit
)
1749 create_indirect_load(nir_var_mem_ssbo
, 0, nir_imm_intN_t(b
, 0x100000000, 64), 0x1);
1750 create_indirect_load(nir_var_mem_ssbo
, 0, nir_imm_intN_t(b
, 0x200000004, 64), 0x2);
1752 nir_validate_shader(b
->shader
, NULL
);
1753 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1755 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1757 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1760 TEST_F(nir_load_store_vectorize_test
, ssbo_load_distant_indirect_64bit
)
1762 nir_ssa_def
*index_base
= nir_u2u64(b
, nir_load_local_invocation_index(b
));
1763 nir_ssa_def
*first
= nir_imul_imm(b
, index_base
, 0x100000000);
1764 nir_ssa_def
*second
= nir_imul_imm(b
, index_base
, 0x200000000);
1765 create_indirect_load(nir_var_mem_ssbo
, 0, first
, 0x1);
1766 create_indirect_load(nir_var_mem_ssbo
, 0, second
, 0x2);
1768 nir_validate_shader(b
->shader
, NULL
);
1769 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1771 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1773 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1776 TEST_F(nir_load_store_vectorize_test
, ssbo_offset_overflow_robust
)
1778 create_load(nir_var_mem_ssbo
, 0, 0xfffffffc, 0x1);
1779 create_load(nir_var_mem_ssbo
, 0, 0x0, 0x2);
1781 nir_validate_shader(b
->shader
, NULL
);
1782 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1784 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
, false, nir_var_mem_ssbo
));
1786 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);