2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include <gtest/gtest.h>
27 #include "nir_builder.h"
29 /* This is a macro so you get good line numbers */
30 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle) \
31 EXPECT_EQ((instr)->src[0].src.ssa, &(load)->dest.ssa); \
32 EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
36 class nir_load_store_vectorize_test
: public ::testing::Test
{
38 nir_load_store_vectorize_test();
39 ~nir_load_store_vectorize_test();
41 unsigned count_intrinsics(nir_intrinsic_op intrinsic
);
43 nir_intrinsic_instr
*get_intrinsic(nir_intrinsic_op intrinsic
,
46 bool run_vectorizer(nir_variable_mode modes
, bool cse
=false,
47 nir_variable_mode robust_modes
= (nir_variable_mode
)0);
49 nir_ssa_def
*get_resource(uint32_t binding
, bool ssbo
);
51 nir_intrinsic_instr
*create_indirect_load(nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
,
52 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
54 void create_indirect_store(nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
,
55 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
56 unsigned wrmask
=0xf, unsigned access
=0);
58 nir_intrinsic_instr
*create_load(nir_variable_mode mode
, uint32_t binding
, uint32_t offset
,
59 uint32_t id
, unsigned bit_size
=32, unsigned components
=1,
61 void create_store(nir_variable_mode mode
, uint32_t binding
, uint32_t offset
,
62 uint32_t id
, unsigned bit_size
=32, unsigned components
=1, unsigned wrmask
=0xf,
65 void create_shared_load(nir_deref_instr
*deref
, uint32_t id
,
66 unsigned bit_size
=32, unsigned components
=1);
67 void create_shared_store(nir_deref_instr
*deref
, uint32_t id
,
68 unsigned bit_size
=32, unsigned components
=1, unsigned wrmask
=0xf);
70 bool test_alu(nir_instr
*instr
, nir_op op
);
71 bool test_alu_def(nir_instr
*instr
, unsigned index
, nir_ssa_def
*def
, unsigned swizzle
=0);
73 static bool mem_vectorize_callback(unsigned align
, unsigned bit_size
,
74 unsigned num_components
, unsigned high_offset
,
75 nir_intrinsic_instr
*low
, nir_intrinsic_instr
*high
);
76 static void shared_type_info(const struct glsl_type
*type
, unsigned *size
, unsigned *align
);
78 std::string
swizzle(nir_alu_instr
*instr
, int src
);
83 std::map
<unsigned, nir_alu_instr
*> movs
;
84 std::map
<unsigned, nir_alu_src
*> loads
;
85 std::map
<unsigned, nir_ssa_def
*> res_map
;
88 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
90 glsl_type_singleton_init_or_ref();
92 mem_ctx
= ralloc_context(NULL
);
93 static const nir_shader_compiler_options options
= { };
94 b
= rzalloc(mem_ctx
, nir_builder
);
95 nir_builder_init_simple_shader(b
, mem_ctx
, MESA_SHADER_COMPUTE
, &options
);
98 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
101 printf("\nShader from the failed test:\n\n");
102 nir_print_shader(b
->shader
, stdout
);
105 ralloc_free(mem_ctx
);
107 glsl_type_singleton_decref();
111 nir_load_store_vectorize_test::swizzle(nir_alu_instr
*instr
, int src
)
114 for (unsigned i
= 0; i
< nir_ssa_alu_instr_src_components(instr
, src
); i
++) {
115 swizzle
+= "xyzw"[instr
->src
[src
].swizzle
[i
]];
122 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic
)
125 nir_foreach_block(block
, b
->impl
) {
126 nir_foreach_instr(instr
, block
) {
127 if (instr
->type
!= nir_instr_type_intrinsic
)
129 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
130 if (intrin
->intrinsic
== intrinsic
)
137 nir_intrinsic_instr
*
138 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic
,
141 nir_foreach_block(block
, b
->impl
) {
142 nir_foreach_instr(instr
, block
) {
143 if (instr
->type
!= nir_instr_type_intrinsic
)
145 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
146 if (intrin
->intrinsic
== intrinsic
) {
157 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes
,
159 nir_variable_mode robust_modes
)
161 if (modes
& nir_var_mem_shared
)
162 nir_lower_vars_to_explicit_types(b
->shader
, nir_var_mem_shared
, shared_type_info
);
163 bool progress
= nir_opt_load_store_vectorize(b
->shader
, modes
, mem_vectorize_callback
, robust_modes
);
165 nir_validate_shader(b
->shader
, NULL
);
167 nir_opt_cse(b
->shader
);
168 nir_copy_prop(b
->shader
);
169 nir_opt_algebraic(b
->shader
);
170 nir_opt_constant_folding(b
->shader
);
176 nir_load_store_vectorize_test::get_resource(uint32_t binding
, bool ssbo
)
178 if (res_map
.count(binding
))
179 return res_map
[binding
];
181 nir_intrinsic_instr
*res
= nir_intrinsic_instr_create(
182 b
->shader
, nir_intrinsic_vulkan_resource_index
);
183 nir_ssa_dest_init(&res
->instr
, &res
->dest
, 1, 32, NULL
);
184 res
->num_components
= 1;
185 res
->src
[0] = nir_src_for_ssa(nir_imm_zero(b
, 1, 32));
186 nir_intrinsic_set_desc_type(
187 res
, ssbo
? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
188 nir_intrinsic_set_desc_set(res
, 0);
189 nir_intrinsic_set_binding(res
, binding
);
190 nir_builder_instr_insert(b
, &res
->instr
);
191 res_map
[binding
] = &res
->dest
.ssa
;
192 return &res
->dest
.ssa
;
195 nir_intrinsic_instr
*
196 nir_load_store_vectorize_test::create_indirect_load(
197 nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
, uint32_t id
,
198 unsigned bit_size
, unsigned components
, unsigned access
)
200 nir_intrinsic_op intrinsic
;
201 nir_ssa_def
*res
= NULL
;
203 case nir_var_mem_ubo
:
204 intrinsic
= nir_intrinsic_load_ubo
;
205 res
= get_resource(binding
, false);
207 case nir_var_mem_ssbo
:
208 intrinsic
= nir_intrinsic_load_ssbo
;
209 res
= get_resource(binding
, true);
211 case nir_var_mem_push_const
:
212 intrinsic
= nir_intrinsic_load_push_constant
;
217 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, intrinsic
);
218 nir_ssa_dest_init(&load
->instr
, &load
->dest
, components
, bit_size
, NULL
);
219 load
->num_components
= components
;
221 load
->src
[0] = nir_src_for_ssa(res
);
222 load
->src
[1] = nir_src_for_ssa(offset
);
224 load
->src
[0] = nir_src_for_ssa(offset
);
226 if (mode
!= nir_var_mem_push_const
) {
227 nir_intrinsic_set_align(load
, (bit_size
== 1 ? 32 : bit_size
) / 8, 0);
228 nir_intrinsic_set_access(load
, (gl_access_qualifier
)access
);
230 nir_builder_instr_insert(b
, &load
->instr
);
231 nir_alu_instr
*mov
= nir_instr_as_alu(nir_mov(b
, &load
->dest
.ssa
)->parent_instr
);
233 loads
[id
] = &mov
->src
[0];
239 nir_load_store_vectorize_test::create_indirect_store(
240 nir_variable_mode mode
, uint32_t binding
, nir_ssa_def
*offset
, uint32_t id
,
241 unsigned bit_size
, unsigned components
, unsigned wrmask
, unsigned access
)
243 nir_const_value values
[NIR_MAX_VEC_COMPONENTS
];
244 for (unsigned i
= 0; i
< components
; i
++)
245 values
[i
] = nir_const_value_for_raw_uint((id
<< 4) | i
, bit_size
);
246 nir_ssa_def
*value
= nir_build_imm(b
, components
, bit_size
, values
);
248 nir_intrinsic_op intrinsic
;
249 nir_ssa_def
*res
= NULL
;
251 case nir_var_mem_ssbo
:
252 intrinsic
= nir_intrinsic_store_ssbo
;
253 res
= get_resource(binding
, true);
255 case nir_var_mem_shared
:
256 intrinsic
= nir_intrinsic_store_shared
;
261 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
->shader
, intrinsic
);
262 nir_ssa_dest_init(&store
->instr
, &store
->dest
, components
, bit_size
, NULL
);
263 store
->num_components
= components
;
265 store
->src
[0] = nir_src_for_ssa(value
);
266 store
->src
[1] = nir_src_for_ssa(res
);
267 store
->src
[2] = nir_src_for_ssa(offset
);
269 store
->src
[0] = nir_src_for_ssa(value
);
270 store
->src
[1] = nir_src_for_ssa(offset
);
272 nir_intrinsic_set_align(store
, (bit_size
== 1 ? 32 : bit_size
) / 8, 0);
273 nir_intrinsic_set_access(store
, (gl_access_qualifier
)access
);
274 nir_intrinsic_set_write_mask(store
, wrmask
& ((1 << components
) - 1));
275 nir_builder_instr_insert(b
, &store
->instr
);
278 nir_intrinsic_instr
*
279 nir_load_store_vectorize_test::create_load(
280 nir_variable_mode mode
, uint32_t binding
, uint32_t offset
, uint32_t id
,
281 unsigned bit_size
, unsigned components
, unsigned access
)
283 return create_indirect_load(mode
, binding
, nir_imm_int(b
, offset
), id
, bit_size
, components
, access
);
287 nir_load_store_vectorize_test::create_store(
288 nir_variable_mode mode
, uint32_t binding
, uint32_t offset
, uint32_t id
,
289 unsigned bit_size
, unsigned components
, unsigned wrmask
, unsigned access
)
291 create_indirect_store(mode
, binding
, nir_imm_int(b
, offset
), id
, bit_size
, components
, wrmask
, access
);
294 void nir_load_store_vectorize_test::create_shared_load(
295 nir_deref_instr
*deref
, uint32_t id
, unsigned bit_size
, unsigned components
)
297 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_deref
);
298 nir_ssa_dest_init(&load
->instr
, &load
->dest
, components
, bit_size
, NULL
);
299 load
->num_components
= components
;
300 load
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
301 nir_builder_instr_insert(b
, &load
->instr
);
302 nir_alu_instr
*mov
= nir_instr_as_alu(nir_mov(b
, &load
->dest
.ssa
)->parent_instr
);
304 loads
[id
] = &mov
->src
[0];
307 void nir_load_store_vectorize_test::create_shared_store(
308 nir_deref_instr
*deref
, uint32_t id
,
309 unsigned bit_size
, unsigned components
, unsigned wrmask
)
311 nir_const_value values
[NIR_MAX_VEC_COMPONENTS
];
312 for (unsigned i
= 0; i
< components
; i
++)
313 values
[i
] = nir_const_value_for_raw_uint((id
<< 4) | i
, bit_size
);
314 nir_ssa_def
*value
= nir_build_imm(b
, components
, bit_size
, values
);
316 nir_intrinsic_instr
*store
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_deref
);
317 nir_ssa_dest_init(&store
->instr
, &store
->dest
, components
, bit_size
, NULL
);
318 store
->num_components
= components
;
319 store
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
320 store
->src
[1] = nir_src_for_ssa(value
);
321 nir_intrinsic_set_write_mask(store
, wrmask
& ((1 << components
) - 1));
322 nir_builder_instr_insert(b
, &store
->instr
);
325 bool nir_load_store_vectorize_test::test_alu(nir_instr
*instr
, nir_op op
)
327 return instr
->type
== nir_instr_type_alu
&& nir_instr_as_alu(instr
)->op
== op
;
330 bool nir_load_store_vectorize_test::test_alu_def(
331 nir_instr
*instr
, unsigned index
, nir_ssa_def
*def
, unsigned swizzle
)
333 if (instr
->type
!= nir_instr_type_alu
)
336 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
338 if (index
>= nir_op_infos
[alu
->op
].num_inputs
)
340 if (alu
->src
[index
].src
.ssa
!= def
)
342 if (alu
->src
[index
].swizzle
[0] != swizzle
)
348 bool nir_load_store_vectorize_test::mem_vectorize_callback(
349 unsigned align
, unsigned bit_size
, unsigned num_components
, unsigned high_offset
,
350 nir_intrinsic_instr
*low
, nir_intrinsic_instr
*high
)
355 void nir_load_store_vectorize_test::shared_type_info(
356 const struct glsl_type
*type
, unsigned *size
, unsigned *align
)
358 assert(glsl_type_is_vector_or_scalar(type
));
360 uint32_t comp_size
= glsl_type_is_boolean(type
)
361 ? 4 : glsl_get_bit_size(type
) / 8;
362 unsigned length
= glsl_get_vector_elements(type
);
363 *size
= comp_size
* length
,
368 TEST_F(nir_load_store_vectorize_test
, ubo_load_adjacent
)
370 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
371 create_load(nir_var_mem_ubo
, 0, 4, 0x2);
373 nir_validate_shader(b
->shader
, NULL
);
374 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
376 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
378 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
380 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
381 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
382 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
383 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
384 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
385 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
388 TEST_F(nir_load_store_vectorize_test
, ubo_load_intersecting
)
390 create_load(nir_var_mem_ubo
, 0, 0, 0x1, 32, 2);
391 create_load(nir_var_mem_ubo
, 0, 4, 0x2, 32, 2);
393 nir_validate_shader(b
->shader
, NULL
);
394 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
396 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
398 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
400 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
401 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
402 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
403 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
404 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "xy");
405 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "yz");
408 TEST_F(nir_load_store_vectorize_test
, ubo_load_identical
)
410 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
411 create_load(nir_var_mem_ubo
, 0, 0, 0x2);
413 nir_validate_shader(b
->shader
, NULL
);
414 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
416 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
418 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
420 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ubo
, 0);
421 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
422 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
423 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
424 ASSERT_EQ(loads
[0x1]->src
.ssa
, &load
->dest
.ssa
);
425 ASSERT_EQ(loads
[0x2]->src
.ssa
, &load
->dest
.ssa
);
426 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
427 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "x");
430 TEST_F(nir_load_store_vectorize_test
, ubo_load_large
)
432 create_load(nir_var_mem_ubo
, 0, 0, 0x1, 32, 2);
433 create_load(nir_var_mem_ubo
, 0, 8, 0x2, 32, 3);
435 nir_validate_shader(b
->shader
, NULL
);
436 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
438 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
440 nir_validate_shader(b
->shader
, NULL
);
441 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
444 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent
)
446 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
447 create_load(nir_var_mem_push_const
, 0, 4, 0x2);
449 nir_validate_shader(b
->shader
, NULL
);
450 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
452 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
454 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
456 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
457 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
458 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
459 ASSERT_EQ(nir_src_as_uint(load
->src
[0]), 0);
460 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
461 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
464 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent_base
)
466 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
467 nir_intrinsic_set_base(create_load(nir_var_mem_push_const
, 0, 0, 0x2), 4);
469 nir_validate_shader(b
->shader
, NULL
);
470 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
472 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
474 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
476 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
477 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
478 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
479 ASSERT_EQ(nir_src_as_uint(load
->src
[0]), 0);
480 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
481 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
484 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent
)
486 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
487 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
489 nir_validate_shader(b
->shader
, NULL
);
490 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
492 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
494 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
496 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
497 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
498 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
499 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
500 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
501 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
504 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect
)
506 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
507 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x1);
508 create_indirect_load(nir_var_mem_ssbo
, 0, nir_iadd_imm(b
, index_base
, 4), 0x2);
510 nir_validate_shader(b
->shader
, NULL
);
511 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
513 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
515 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
517 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
518 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
519 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
520 ASSERT_EQ(load
->src
[1].ssa
, index_base
);
521 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
522 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
525 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect_sub
)
527 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
528 nir_ssa_def
*index_base_prev
= nir_iadd_imm(b
, index_base
, 0xfffffffc);
529 create_indirect_load(nir_var_mem_ssbo
, 0, index_base_prev
, 0x1);
530 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x2);
532 nir_validate_shader(b
->shader
, NULL
);
533 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
535 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
537 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
539 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
540 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
541 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
542 ASSERT_EQ(load
->src
[1].ssa
, index_base_prev
);
543 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
544 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
547 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_indirect_neg_stride
)
549 nir_ssa_def
*inv
= nir_load_local_invocation_index(b
);
550 nir_ssa_def
*inv_plus_one
= nir_iadd_imm(b
, inv
, 1);
551 nir_ssa_def
*index_base
= nir_imul_imm(b
, inv
, 0xfffffffc);
552 nir_ssa_def
*index_base_prev
= nir_imul_imm(b
, inv_plus_one
, 0xfffffffc);
553 create_indirect_load(nir_var_mem_ssbo
, 0, index_base_prev
, 0x1);
554 create_indirect_load(nir_var_mem_ssbo
, 0, index_base
, 0x2);
556 nir_validate_shader(b
->shader
, NULL
);
557 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
559 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
561 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
563 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
564 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
565 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
566 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
567 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
569 /* nir_opt_algebraic optimizes the imul */
570 ASSERT_TRUE(test_alu(load
->src
[1].ssa
->parent_instr
, nir_op_ineg
));
571 nir_ssa_def
*offset
= nir_instr_as_alu(load
->src
[1].ssa
->parent_instr
)->src
[0].src
.ssa
;
572 ASSERT_TRUE(test_alu(offset
->parent_instr
, nir_op_ishl
));
573 nir_alu_instr
*shl
= nir_instr_as_alu(offset
->parent_instr
);
574 ASSERT_EQ(shl
->src
[0].src
.ssa
, inv_plus_one
);
575 ASSERT_EQ(nir_src_as_uint(shl
->src
[1].src
), 2);
578 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_adjacent
)
580 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
581 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
582 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
584 nir_validate_shader(b
->shader
, NULL
);
585 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
587 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
589 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
591 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
592 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
593 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
594 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
595 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
596 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "x");
599 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_intersecting
)
601 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
602 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
603 create_load(nir_var_mem_ssbo
, 0, 0, 0x3, 32, 2);
605 nir_validate_shader(b
->shader
, NULL
);
606 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
608 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
610 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
613 TEST_F(nir_load_store_vectorize_test
, ssbo_load_identical_store_identical
)
615 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
616 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
617 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
619 nir_validate_shader(b
->shader
, NULL
);
620 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
622 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
624 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
627 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical_load_identical
)
629 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
630 create_load(nir_var_mem_ssbo
, 0, 0, 0x2);
631 create_store(nir_var_mem_ssbo
, 0, 0, 0x3);
633 nir_validate_shader(b
->shader
, NULL
);
634 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
636 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
638 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
641 /* if nir_opt_load_store_vectorize were implemented like many load/store
642 * optimization passes are (for example, nir_opt_combine_stores and
643 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
644 * encountered, this case wouldn't be optimized.
645 * A similar test for derefs is shared_load_adjacent_store_identical. */
646 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_store_identical
)
648 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
649 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
650 create_load(nir_var_mem_ssbo
, 0, 4, 0x3);
652 nir_validate_shader(b
->shader
, NULL
);
653 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
654 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
656 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
658 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
659 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
661 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
662 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
663 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
664 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
665 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
666 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "y");
669 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent
)
671 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
672 create_store(nir_var_mem_ssbo
, 0, 4, 0x2);
674 nir_validate_shader(b
->shader
, NULL
);
675 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
677 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
679 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
681 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
682 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
683 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x3);
684 nir_ssa_def
*val
= store
->src
[0].ssa
;
685 ASSERT_EQ(val
->bit_size
, 32);
686 ASSERT_EQ(val
->num_components
, 2);
687 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
688 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
689 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
692 TEST_F(nir_load_store_vectorize_test
, ssbo_store_intersecting
)
694 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
695 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 32, 2);
697 nir_validate_shader(b
->shader
, NULL
);
698 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
700 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
702 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
704 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
705 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
706 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
707 nir_ssa_def
*val
= store
->src
[0].ssa
;
708 ASSERT_EQ(val
->bit_size
, 32);
709 ASSERT_EQ(val
->num_components
, 3);
710 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
711 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
712 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
713 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x21);
716 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical
)
718 create_store(nir_var_mem_ssbo
, 0, 0, 0x1);
719 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
721 nir_validate_shader(b
->shader
, NULL
);
722 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
724 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
726 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
728 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
729 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
730 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x1);
731 nir_ssa_def
*val
= store
->src
[0].ssa
;
732 ASSERT_EQ(val
->bit_size
, 32);
733 ASSERT_EQ(val
->num_components
, 1);
734 ASSERT_EQ(nir_src_as_uint(store
->src
[0]), 0x20);
737 TEST_F(nir_load_store_vectorize_test
, ssbo_store_large
)
739 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
740 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 32, 3);
742 nir_validate_shader(b
->shader
, NULL
);
743 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
745 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
747 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
750 TEST_F(nir_load_store_vectorize_test
, ubo_load_adjacent_memory_barrier
)
752 create_load(nir_var_mem_ubo
, 0, 0, 0x1);
754 nir_scoped_memory_barrier(b
, NIR_SCOPE_DEVICE
, NIR_MEMORY_ACQ_REL
,
757 create_load(nir_var_mem_ubo
, 0, 4, 0x2);
759 nir_validate_shader(b
->shader
, NULL
);
760 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 2);
762 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo
));
764 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo
), 1);
767 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_memory_barrier
)
769 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
771 nir_scoped_memory_barrier(b
, NIR_SCOPE_DEVICE
, NIR_MEMORY_ACQ_REL
,
774 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
776 nir_validate_shader(b
->shader
, NULL
);
777 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
779 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
781 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
784 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
785 * doesn't require that loads/stores complete.
787 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_barrier
)
789 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
790 nir_builder_instr_insert(b
, &nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_control_barrier
)->instr
);
791 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
793 nir_validate_shader(b
->shader
, NULL
);
794 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
796 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
798 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
801 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_memory_barrier_shared
)
803 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
805 nir_scoped_memory_barrier(b
, NIR_SCOPE_WORKGROUP
, NIR_MEMORY_ACQ_REL
,
808 create_load(nir_var_mem_ssbo
, 0, 4, 0x2);
810 nir_validate_shader(b
->shader
, NULL
);
811 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
813 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
815 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
818 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_8_8_16
)
820 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 8);
821 create_load(nir_var_mem_ssbo
, 0, 1, 0x2, 8);
822 create_load(nir_var_mem_ssbo
, 0, 2, 0x3, 16);
824 nir_validate_shader(b
->shader
, NULL
);
825 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 3);
827 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
829 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
831 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
832 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 8);
833 ASSERT_EQ(load
->dest
.ssa
.num_components
, 4);
834 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
835 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
836 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
838 nir_ssa_def
*val
= loads
[0x3]->src
.ssa
;
839 ASSERT_EQ(val
->bit_size
, 16);
840 ASSERT_EQ(val
->num_components
, 1);
841 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_ior
));
842 nir_ssa_def
*low
= nir_instr_as_alu(val
->parent_instr
)->src
[0].src
.ssa
;
843 nir_ssa_def
*high
= nir_instr_as_alu(val
->parent_instr
)->src
[1].src
.ssa
;
844 ASSERT_TRUE(test_alu(high
->parent_instr
, nir_op_ishl
));
845 high
= nir_instr_as_alu(high
->parent_instr
)->src
[0].src
.ssa
;
846 ASSERT_TRUE(test_alu(low
->parent_instr
, nir_op_u2u16
));
847 ASSERT_TRUE(test_alu(high
->parent_instr
, nir_op_u2u16
));
848 ASSERT_TRUE(test_alu_def(low
->parent_instr
, 0, &load
->dest
.ssa
, 2));
849 ASSERT_TRUE(test_alu_def(high
->parent_instr
, 0, &load
->dest
.ssa
, 3));
852 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_32_32_64
)
854 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
855 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
857 nir_validate_shader(b
->shader
, NULL
);
858 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
860 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
862 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
864 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
865 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
866 ASSERT_EQ(load
->dest
.ssa
.num_components
, 4);
867 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
868 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "xy");
870 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
871 ASSERT_EQ(val
->bit_size
, 64);
872 ASSERT_EQ(val
->num_components
, 1);
873 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
874 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
875 EXPECT_INSTR_SWIZZLES(pack
, load
, "zw");
878 TEST_F(nir_load_store_vectorize_test
, ssbo_load_adjacent_32_32_64_64
)
880 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
881 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
882 create_load(nir_var_mem_ssbo
, 0, 16, 0x3, 64);
884 nir_validate_shader(b
->shader
, NULL
);
885 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 3);
887 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
, true));
889 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
891 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
892 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 64);
893 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
894 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
895 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "z");
897 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
898 ASSERT_EQ(val
->bit_size
, 64);
899 ASSERT_EQ(val
->num_components
, 1);
900 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_mov
));
901 nir_alu_instr
*mov
= nir_instr_as_alu(val
->parent_instr
);
902 EXPECT_INSTR_SWIZZLES(mov
, load
, "y");
904 val
= loads
[0x1]->src
.ssa
;
905 ASSERT_EQ(val
->bit_size
, 32);
906 ASSERT_EQ(val
->num_components
, 2);
907 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_unpack_64_2x32
));
908 nir_alu_instr
*unpack
= nir_instr_as_alu(val
->parent_instr
);
909 EXPECT_INSTR_SWIZZLES(unpack
, load
, "x");
912 TEST_F(nir_load_store_vectorize_test
, ssbo_load_intersecting_32_32_64
)
914 create_load(nir_var_mem_ssbo
, 0, 4, 0x1, 32, 2);
915 create_load(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
917 nir_validate_shader(b
->shader
, NULL
);
918 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
920 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
922 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
924 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
925 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
926 ASSERT_EQ(load
->dest
.ssa
.num_components
, 3);
927 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 4);
928 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "xy");
930 nir_ssa_def
*val
= loads
[0x2]->src
.ssa
;
931 ASSERT_EQ(val
->bit_size
, 64);
932 ASSERT_EQ(val
->num_components
, 1);
933 ASSERT_TRUE(test_alu(val
->parent_instr
, nir_op_pack_64_2x32
));
934 nir_alu_instr
*pack
= nir_instr_as_alu(val
->parent_instr
);
935 EXPECT_INSTR_SWIZZLES(pack
, load
, "yz");
938 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_8_8_16
)
940 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 8);
941 create_store(nir_var_mem_ssbo
, 0, 1, 0x2, 8);
942 create_store(nir_var_mem_ssbo
, 0, 2, 0x3, 16);
944 nir_validate_shader(b
->shader
, NULL
);
945 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 3);
947 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
949 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
951 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
952 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
953 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
954 nir_ssa_def
*val
= store
->src
[0].ssa
;
955 ASSERT_EQ(val
->bit_size
, 8);
956 ASSERT_EQ(val
->num_components
, 4);
957 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
958 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 8), 0x10);
959 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 8), 0x20);
960 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 8), 0x30);
961 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 8), 0x0);
964 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_32_64
)
966 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
967 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
969 nir_validate_shader(b
->shader
, NULL
);
970 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
972 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
974 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
976 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
977 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
978 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
979 nir_ssa_def
*val
= store
->src
[0].ssa
;
980 ASSERT_EQ(val
->bit_size
, 32);
981 ASSERT_EQ(val
->num_components
, 4);
982 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
983 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
984 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x11);
985 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x20);
986 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 32), 0x0);
989 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_32_64_64
)
991 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
992 create_store(nir_var_mem_ssbo
, 0, 8, 0x2, 64);
993 create_store(nir_var_mem_ssbo
, 0, 16, 0x3, 64);
995 nir_validate_shader(b
->shader
, NULL
);
996 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 3);
998 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1000 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1002 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1003 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1004 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
1005 nir_ssa_def
*val
= store
->src
[0].ssa
;
1006 ASSERT_EQ(val
->bit_size
, 64);
1007 ASSERT_EQ(val
->num_components
, 3);
1008 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1009 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 64), 0x1100000010ull
);
1010 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 64), 0x20);
1011 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 64), 0x30);
1014 TEST_F(nir_load_store_vectorize_test
, ssbo_store_intersecting_32_32_64
)
1016 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 2);
1017 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 64);
1019 nir_validate_shader(b
->shader
, NULL
);
1020 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1022 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1024 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1026 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1027 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1028 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x7);
1029 nir_ssa_def
*val
= store
->src
[0].ssa
;
1030 ASSERT_EQ(val
->bit_size
, 32);
1031 ASSERT_EQ(val
->num_components
, 3);
1032 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1033 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1034 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
1035 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x0);
1038 TEST_F(nir_load_store_vectorize_test
, ssbo_store_adjacent_32_64
)
1040 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32);
1041 create_store(nir_var_mem_ssbo
, 0, 4, 0x2, 64, 2);
1043 nir_validate_shader(b
->shader
, NULL
);
1044 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1046 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1048 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1051 TEST_F(nir_load_store_vectorize_test
, ssbo_store_identical_wrmask
)
1053 create_store(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 4, 1 | 4);
1054 create_store(nir_var_mem_ssbo
, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1056 nir_validate_shader(b
->shader
, NULL
);
1057 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 2);
1059 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1061 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo
), 1);
1063 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_ssbo
, 0);
1064 ASSERT_EQ(nir_src_as_uint(store
->src
[2]), 0);
1065 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0xf);
1066 nir_ssa_def
*val
= store
->src
[0].ssa
;
1067 ASSERT_EQ(val
->bit_size
, 32);
1068 ASSERT_EQ(val
->num_components
, 4);
1069 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1070 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1071 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x21);
1072 ASSERT_EQ(nir_const_value_as_uint(cv
[2], 32), 0x22);
1073 ASSERT_EQ(nir_const_value_as_uint(cv
[3], 32), 0x23);
1076 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent
)
1078 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1079 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1081 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1082 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1084 nir_validate_shader(b
->shader
, NULL
);
1085 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1087 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1089 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1091 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1092 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1093 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1095 deref
= nir_src_as_deref(load
->src
[0]);
1096 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1098 deref
= nir_deref_instr_parent(deref
);
1099 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1100 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1102 deref
= nir_deref_instr_parent(deref
);
1103 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1104 ASSERT_EQ(deref
->var
, var
);
1106 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1107 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
1110 TEST_F(nir_load_store_vectorize_test
, shared_load_distant_64bit
)
1112 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1113 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1114 nir_ssa_dest_init(&deref
->instr
, &deref
->dest
, 1, 64, NULL
);
1116 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0x100000000), 0x1);
1117 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0x200000001), 0x2);
1119 nir_validate_shader(b
->shader
, NULL
);
1120 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1122 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared
));
1124 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1127 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_indirect
)
1129 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1130 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1131 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1133 create_shared_load(nir_build_deref_array(b
, deref
, index_base
), 0x1);
1134 create_shared_load(nir_build_deref_array(b
, deref
, nir_iadd_imm(b
, index_base
, 1)), 0x2);
1136 nir_validate_shader(b
->shader
, NULL
);
1137 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1139 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1141 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1143 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1144 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1145 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1147 deref
= nir_src_as_deref(load
->src
[0]);
1148 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1150 deref
= nir_deref_instr_parent(deref
);
1151 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1152 ASSERT_EQ(deref
->arr
.index
.ssa
, index_base
);
1154 deref
= nir_deref_instr_parent(deref
);
1155 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1156 ASSERT_EQ(deref
->var
, var
);
1158 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1159 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
1162 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_indirect_sub
)
1164 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1165 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1166 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1167 nir_ssa_def
*index_base_prev
= nir_iadd_imm(b
, index_base
, 0xffffffff);
1169 create_shared_load(nir_build_deref_array(b
, deref
, index_base_prev
), 0x1);
1170 create_shared_load(nir_build_deref_array(b
, deref
, index_base
), 0x2);
1172 nir_validate_shader(b
->shader
, NULL
);
1173 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1175 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1177 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1179 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1180 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1181 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1183 deref
= nir_src_as_deref(load
->src
[0]);
1184 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1186 deref
= nir_deref_instr_parent(deref
);
1187 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1188 ASSERT_EQ(deref
->arr
.index
.ssa
, index_base_prev
);
1190 deref
= nir_deref_instr_parent(deref
);
1191 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1192 ASSERT_EQ(deref
->var
, var
);
1194 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1195 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
1198 TEST_F(nir_load_store_vectorize_test
, shared_load_struct
)
1200 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1201 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1203 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1204 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1206 create_shared_load(nir_build_deref_struct(b
, deref
, 0), 0x1);
1207 create_shared_load(nir_build_deref_array_imm(b
, nir_build_deref_struct(b
, deref
, 1), 0), 0x2);
1209 nir_validate_shader(b
->shader
, NULL
);
1210 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1212 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1214 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1216 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1217 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1218 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1220 deref
= nir_src_as_deref(load
->src
[0]);
1221 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1223 deref
= nir_deref_instr_parent(deref
);
1224 ASSERT_EQ(deref
->deref_type
, nir_deref_type_struct
);
1225 ASSERT_EQ(deref
->strct
.index
, 0);
1227 deref
= nir_deref_instr_parent(deref
);
1228 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1229 ASSERT_EQ(deref
->var
, var
);
1231 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1232 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
1235 TEST_F(nir_load_store_vectorize_test
, shared_load_identical_store_adjacent
)
1237 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1238 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1240 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1241 create_shared_store(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1242 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x3);
1244 nir_validate_shader(b
->shader
, NULL
);
1245 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1246 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1248 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1250 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1251 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1253 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1254 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1255 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1257 deref
= nir_src_as_deref(load
->src
[0]);
1258 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1259 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1261 deref
= nir_deref_instr_parent(deref
);
1262 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1263 ASSERT_EQ(deref
->var
, var
);
1265 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1266 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "x");
1269 TEST_F(nir_load_store_vectorize_test
, shared_load_identical_store_identical
)
1271 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1272 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1274 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1275 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x2);
1276 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x3);
1278 nir_validate_shader(b
->shader
, NULL
);
1279 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1281 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared
));
1283 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1286 TEST_F(nir_load_store_vectorize_test
, shared_load_adjacent_store_identical
)
1288 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1289 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1291 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1292 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x2);
1293 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x3);
1295 nir_validate_shader(b
->shader
, NULL
);
1296 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1297 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1299 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1301 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1302 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1304 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1305 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1306 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1308 deref
= nir_src_as_deref(load
->src
[0]);
1309 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1311 deref
= nir_deref_instr_parent(deref
);
1312 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1313 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1315 deref
= nir_deref_instr_parent(deref
);
1316 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1317 ASSERT_EQ(deref
->var
, var
);
1319 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1320 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "y");
1323 TEST_F(nir_load_store_vectorize_test
, shared_load_bool
)
1325 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1326 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1328 create_shared_load(nir_build_deref_array_imm(b
, deref
, 0), 0x1, 1);
1329 create_shared_load(nir_build_deref_array_imm(b
, deref
, 1), 0x2, 1);
1331 nir_validate_shader(b
->shader
, NULL
);
1332 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1334 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1336 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1338 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1339 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1340 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1342 deref
= nir_src_as_deref(load
->src
[0]);
1343 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1345 deref
= nir_deref_instr_parent(deref
);
1346 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1347 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1349 deref
= nir_deref_instr_parent(deref
);
1350 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1351 ASSERT_EQ(deref
->var
, var
);
1353 ASSERT_TRUE(test_alu(loads
[0x1]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1354 ASSERT_TRUE(test_alu(loads
[0x2]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1355 ASSERT_TRUE(test_alu_def(loads
[0x1]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 0));
1356 ASSERT_TRUE(test_alu_def(loads
[0x2]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 1));
1359 TEST_F(nir_load_store_vectorize_test
, shared_load_bool_mixed
)
1361 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1362 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1364 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1365 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1367 create_shared_load(nir_build_deref_struct(b
, deref
, 0), 0x1, 1);
1368 create_shared_load(nir_build_deref_array_imm(b
, nir_build_deref_struct(b
, deref
, 1), 0), 0x2);
1370 nir_validate_shader(b
->shader
, NULL
);
1371 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1373 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1375 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1377 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1378 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1379 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1381 deref
= nir_src_as_deref(load
->src
[0]);
1382 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1384 deref
= nir_deref_instr_parent(deref
);
1385 ASSERT_EQ(deref
->deref_type
, nir_deref_type_struct
);
1386 ASSERT_EQ(deref
->strct
.index
, 0);
1388 deref
= nir_deref_instr_parent(deref
);
1389 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1390 ASSERT_EQ(deref
->var
, var
);
1392 ASSERT_TRUE(test_alu(loads
[0x1]->src
.ssa
->parent_instr
, nir_op_i2b1
));
1393 ASSERT_TRUE(test_alu_def(loads
[0x1]->src
.ssa
->parent_instr
, 0, &load
->dest
.ssa
, 0));
1395 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
1398 TEST_F(nir_load_store_vectorize_test
, shared_store_adjacent
)
1400 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1401 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1403 create_shared_store(nir_build_deref_array_imm(b
, deref
, 0), 0x1);
1404 create_shared_store(nir_build_deref_array_imm(b
, deref
, 1), 0x2);
1406 nir_validate_shader(b
->shader
, NULL
);
1407 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 2);
1409 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1411 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref
), 1);
1413 nir_intrinsic_instr
*store
= get_intrinsic(nir_intrinsic_store_deref
, 0);
1414 ASSERT_EQ(nir_intrinsic_write_mask(store
), 0x3);
1415 nir_ssa_def
*val
= store
->src
[1].ssa
;
1416 ASSERT_EQ(val
->bit_size
, 32);
1417 ASSERT_EQ(val
->num_components
, 2);
1418 nir_const_value
*cv
= nir_instr_as_load_const(val
->parent_instr
)->value
;
1419 ASSERT_EQ(nir_const_value_as_uint(cv
[0], 32), 0x10);
1420 ASSERT_EQ(nir_const_value_as_uint(cv
[1], 32), 0x20);
1422 deref
= nir_src_as_deref(store
->src
[0]);
1423 ASSERT_EQ(deref
->deref_type
, nir_deref_type_cast
);
1425 deref
= nir_deref_instr_parent(deref
);
1426 ASSERT_EQ(deref
->deref_type
, nir_deref_type_array
);
1427 ASSERT_EQ(nir_src_as_uint(deref
->arr
.index
), 0);
1429 deref
= nir_deref_instr_parent(deref
);
1430 ASSERT_EQ(deref
->deref_type
, nir_deref_type_var
);
1431 ASSERT_EQ(deref
->var
, var
);
1434 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_base
)
1436 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1437 nir_intrinsic_set_base(create_load(nir_var_mem_push_const
, 0, 4, 0x2), 4);
1439 nir_validate_shader(b
->shader
, NULL
);
1440 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1442 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1444 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1447 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_direct_direct
)
1449 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1450 create_load(nir_var_mem_push_const
, 0, 8, 0x2);
1452 nir_validate_shader(b
->shader
, NULL
);
1453 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1455 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1457 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1460 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_direct_indirect
)
1462 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1463 create_load(nir_var_mem_push_const
, 0, 0, 0x1);
1464 create_indirect_load(nir_var_mem_push_const
, 0, index_base
, 0x2);
1466 nir_validate_shader(b
->shader
, NULL
);
1467 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1469 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1471 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1474 TEST_F(nir_load_store_vectorize_test
, push_const_load_separate_indirect_indirect
)
1476 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1477 create_indirect_load(nir_var_mem_push_const
, 0,
1478 nir_iadd(b
, nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 2)), nir_imm_int(b
, 16)), nir_imm_int(b
, 32)), 0x1);
1479 create_indirect_load(nir_var_mem_push_const
, 0,
1480 nir_iadd(b
, nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 3)), nir_imm_int(b
, 16)), nir_imm_int(b
, 32)), 0x2);
1482 nir_validate_shader(b
->shader
, NULL
);
1483 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1485 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const
));
1487 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1490 TEST_F(nir_load_store_vectorize_test
, push_const_load_adjacent_complex_indirect
)
1492 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1493 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1494 nir_ssa_def
*low
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 12));
1495 nir_ssa_def
*high
= nir_imul(b
, nir_iadd(b
, index_base
, nir_imm_int(b
, 1)), nir_imm_int(b
, 16));
1496 create_indirect_load(nir_var_mem_push_const
, 0, low
, 0x1);
1497 create_indirect_load(nir_var_mem_push_const
, 0, high
, 0x2);
1499 nir_validate_shader(b
->shader
, NULL
);
1500 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 2);
1502 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const
));
1504 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant
), 1);
1506 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_push_constant
, 0);
1507 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1508 ASSERT_EQ(load
->dest
.ssa
.num_components
, 2);
1509 ASSERT_EQ(load
->src
[0].ssa
, low
);
1510 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1511 EXPECT_INSTR_SWIZZLES(movs
[0x2], load
, "y");
1514 TEST_F(nir_load_store_vectorize_test
, ssbo_alias0
)
1516 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1517 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
1518 create_indirect_store(nir_var_mem_ssbo
, 0, index_base
, 0x2);
1519 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
1521 nir_validate_shader(b
->shader
, NULL
);
1522 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1524 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1526 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1529 TEST_F(nir_load_store_vectorize_test
, ssbo_alias1
)
1531 nir_ssa_def
*load_base
= nir_load_global_invocation_index(b
, 32);
1532 nir_ssa_def
*store_base
= nir_load_local_invocation_index(b
);
1533 create_indirect_load(nir_var_mem_ssbo
, 0, load_base
, 0x1);
1534 create_indirect_store(nir_var_mem_ssbo
, 0, store_base
, 0x2);
1535 create_indirect_load(nir_var_mem_ssbo
, 0, load_base
, 0x3);
1537 nir_validate_shader(b
->shader
, NULL
);
1538 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1540 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo
));
1542 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1545 TEST_F(nir_load_store_vectorize_test
, DISABLED_ssbo_alias2
)
1547 /* TODO: try to combine these loads */
1548 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1549 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 4));
1550 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1551 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1552 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1554 nir_validate_shader(b
->shader
, NULL
);
1555 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1557 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1559 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1561 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1562 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1563 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1564 ASSERT_EQ(load
->src
[1].ssa
, offset
);
1565 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1566 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "x");
1569 TEST_F(nir_load_store_vectorize_test
, ssbo_alias3
)
1571 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1572 * these loads can't be combined because if index_base == 268435455, then
1573 * offset == 0 because the addition would wrap around */
1574 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1575 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 16));
1576 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1577 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1578 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1580 nir_validate_shader(b
->shader
, NULL
);
1581 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1583 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1585 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1588 TEST_F(nir_load_store_vectorize_test
, DISABLED_ssbo_alias4
)
1590 /* TODO: try to combine these loads */
1591 nir_ssa_def
*index_base
= nir_load_local_invocation_index(b
);
1592 nir_ssa_def
*offset
= nir_iadd(b
, nir_imul(b
, index_base
, nir_imm_int(b
, 16)), nir_imm_int(b
, 16));
1593 nir_instr_as_alu(offset
->parent_instr
)->no_unsigned_wrap
= true;
1594 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x1);
1595 create_store(nir_var_mem_ssbo
, 0, 0, 0x2);
1596 create_indirect_load(nir_var_mem_ssbo
, 0, offset
, 0x3);
1598 nir_validate_shader(b
->shader
, NULL
);
1599 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1601 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1603 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1605 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1606 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1607 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1608 ASSERT_EQ(load
->src
[1].ssa
, offset
);
1609 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1610 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "x");
1613 TEST_F(nir_load_store_vectorize_test
, ssbo_alias5
)
1615 create_load(nir_var_mem_ssbo
, 0, 0, 0x1);
1616 create_store(nir_var_mem_ssbo
, 1, 0, 0x2);
1617 create_load(nir_var_mem_ssbo
, 0, 0, 0x3);
1619 nir_validate_shader(b
->shader
, NULL
);
1620 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1622 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1624 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1627 TEST_F(nir_load_store_vectorize_test
, ssbo_alias6
)
1629 create_load(nir_var_mem_ssbo
, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT
);
1630 create_store(nir_var_mem_ssbo
, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT
);
1631 create_load(nir_var_mem_ssbo
, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT
);
1633 nir_validate_shader(b
->shader
, NULL
);
1634 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1636 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1638 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 1);
1640 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_ssbo
, 0);
1641 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1642 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1643 ASSERT_EQ(nir_src_as_uint(load
->src
[1]), 0);
1644 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1645 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "x");
1648 TEST_F(nir_load_store_vectorize_test
, DISABLED_shared_alias0
)
1650 /* TODO: implement type-based alias analysis so that these loads can be
1651 * combined. this is made a bit more difficult than simply using
1652 * nir_compare_derefs() because the vectorizer creates loads/stores with
1653 * casted derefs. The solution would probably be to keep multiple derefs for
1654 * an entry (one for each load/store combined into it). */
1655 glsl_struct_field fields
[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1656 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1658 nir_variable
*var
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_struct_type(fields
, 2, "Struct", false), "var");
1659 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
1661 nir_ssa_def
*index0
= nir_load_local_invocation_index(b
);
1662 nir_ssa_def
*index1
= nir_load_global_invocation_index(b
, 32);
1663 nir_deref_instr
*load_deref
= nir_build_deref_array(b
, nir_build_deref_struct(b
, deref
, 0), index0
);
1665 create_shared_load(load_deref
, 0x1);
1666 create_shared_store(nir_build_deref_array(b
, nir_build_deref_struct(b
, deref
, 1), index1
), 0x2);
1667 create_shared_load(load_deref
, 0x3);
1669 nir_validate_shader(b
->shader
, NULL
);
1670 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1672 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1674 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1676 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1677 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1678 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1679 ASSERT_EQ(load
->src
[0].ssa
, &load_deref
->dest
.ssa
);
1680 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1681 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "x");
1684 TEST_F(nir_load_store_vectorize_test
, shared_alias1
)
1686 nir_variable
*var0
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_uint_type(), "var0");
1687 nir_variable
*var1
= nir_variable_create(b
->shader
, nir_var_mem_shared
, glsl_uint_type(), "var1");
1688 nir_deref_instr
*load_deref
= nir_build_deref_var(b
, var0
);
1690 create_shared_load(load_deref
, 0x1);
1691 create_shared_store(nir_build_deref_var(b
, var1
), 0x2);
1692 create_shared_load(load_deref
, 0x3);
1694 nir_validate_shader(b
->shader
, NULL
);
1695 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 2);
1697 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared
));
1699 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref
), 1);
1701 nir_intrinsic_instr
*load
= get_intrinsic(nir_intrinsic_load_deref
, 0);
1702 ASSERT_EQ(load
->dest
.ssa
.bit_size
, 32);
1703 ASSERT_EQ(load
->dest
.ssa
.num_components
, 1);
1704 ASSERT_EQ(load
->src
[0].ssa
, &load_deref
->dest
.ssa
);
1705 EXPECT_INSTR_SWIZZLES(movs
[0x1], load
, "x");
1706 EXPECT_INSTR_SWIZZLES(movs
[0x3], load
, "x");
1709 TEST_F(nir_load_store_vectorize_test
, ssbo_load_distant_64bit
)
1711 create_indirect_load(nir_var_mem_ssbo
, 0, nir_imm_intN_t(b
, 0x100000000, 64), 0x1);
1712 create_indirect_load(nir_var_mem_ssbo
, 0, nir_imm_intN_t(b
, 0x200000004, 64), 0x2);
1714 nir_validate_shader(b
->shader
, NULL
);
1715 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1717 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1719 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1722 TEST_F(nir_load_store_vectorize_test
, ssbo_load_distant_indirect_64bit
)
1724 nir_ssa_def
*index_base
= nir_u2u64(b
, nir_load_local_invocation_index(b
));
1725 nir_ssa_def
*first
= nir_imul_imm(b
, index_base
, 0x100000000);
1726 nir_ssa_def
*second
= nir_imul_imm(b
, index_base
, 0x200000000);
1727 create_indirect_load(nir_var_mem_ssbo
, 0, first
, 0x1);
1728 create_indirect_load(nir_var_mem_ssbo
, 0, second
, 0x2);
1730 nir_validate_shader(b
->shader
, NULL
);
1731 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1733 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
));
1735 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1738 TEST_F(nir_load_store_vectorize_test
, ssbo_offset_overflow_robust
)
1740 create_load(nir_var_mem_ssbo
, 0, 0xfffffffc, 0x1);
1741 create_load(nir_var_mem_ssbo
, 0, 0x0, 0x2);
1743 nir_validate_shader(b
->shader
, NULL
);
1744 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);
1746 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo
, false, nir_var_mem_ssbo
));
1748 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo
), 2);