nir: Rename nir_intrinsic_barrier to control_barrier
[mesa.git] / src / compiler / nir / tests / load_store_vectorizer_tests.cpp
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25
26 #include "nir.h"
27 #include "nir_builder.h"
28
29 namespace {
30
31 class nir_load_store_vectorize_test : public ::testing::Test {
32 protected:
33 nir_load_store_vectorize_test();
34 ~nir_load_store_vectorize_test();
35
36 unsigned count_intrinsics(nir_intrinsic_op intrinsic);
37
38 nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
39 unsigned index);
40
41 bool run_vectorizer(nir_variable_mode modes, bool cse=false);
42
43 nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
44
45 nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
46 uint32_t id, unsigned bit_size=32, unsigned components=1,
47 unsigned access=0);
48 void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
49 uint32_t id, unsigned bit_size=32, unsigned components=1,
50 unsigned wrmask=0xf, unsigned access=0);
51
52 nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
53 uint32_t id, unsigned bit_size=32, unsigned components=1,
54 unsigned access=0);
55 void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
56 uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
57 unsigned access=0);
58
59 void create_shared_load(nir_deref_instr *deref, uint32_t id,
60 unsigned bit_size=32, unsigned components=1);
61 void create_shared_store(nir_deref_instr *deref, uint32_t id,
62 unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
63
64 bool test_alu(nir_instr *instr, nir_op op);
65 bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
66
67 static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
68 unsigned num_components, unsigned high_offset,
69 nir_intrinsic_instr *low, nir_intrinsic_instr *high);
70 static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
71
72 void *mem_ctx;
73
74 nir_builder *b;
75 std::map<unsigned, nir_alu_src*> loads;
76 std::map<unsigned, nir_ssa_def*> res_map;
77 };
78
79 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
80 {
81 glsl_type_singleton_init_or_ref();
82
83 mem_ctx = ralloc_context(NULL);
84 static const nir_shader_compiler_options options = { };
85 b = rzalloc(mem_ctx, nir_builder);
86 nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_COMPUTE, &options);
87 }
88
89 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
90 {
91 if (HasFailure()) {
92 printf("\nShader from the failed test:\n\n");
93 nir_print_shader(b->shader, stdout);
94 }
95
96 ralloc_free(mem_ctx);
97
98 glsl_type_singleton_decref();
99 }
100
101 unsigned
102 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
103 {
104 unsigned count = 0;
105 nir_foreach_block(block, b->impl) {
106 nir_foreach_instr(instr, block) {
107 if (instr->type != nir_instr_type_intrinsic)
108 continue;
109 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
110 if (intrin->intrinsic == intrinsic)
111 count++;
112 }
113 }
114 return count;
115 }
116
117 nir_intrinsic_instr *
118 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
119 unsigned index)
120 {
121 nir_foreach_block(block, b->impl) {
122 nir_foreach_instr(instr, block) {
123 if (instr->type != nir_instr_type_intrinsic)
124 continue;
125 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
126 if (intrin->intrinsic == intrinsic) {
127 if (index == 0)
128 return intrin;
129 index--;
130 }
131 }
132 }
133 return NULL;
134 }
135
136 bool
137 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes, bool cse)
138 {
139 if (modes & nir_var_mem_shared)
140 nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
141 bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback);
142 if (progress) {
143 nir_validate_shader(b->shader, NULL);
144 if (cse)
145 nir_opt_cse(b->shader);
146 nir_copy_prop(b->shader);
147 nir_opt_algebraic(b->shader);
148 nir_opt_constant_folding(b->shader);
149 }
150 return progress;
151 }
152
153 nir_ssa_def *
154 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
155 {
156 if (res_map.count(binding))
157 return res_map[binding];
158
159 nir_intrinsic_instr *res = nir_intrinsic_instr_create(
160 b->shader, nir_intrinsic_vulkan_resource_index);
161 nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
162 res->num_components = 1;
163 res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
164 nir_intrinsic_set_desc_type(
165 res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
166 nir_intrinsic_set_desc_set(res, 0);
167 nir_intrinsic_set_binding(res, binding);
168 nir_builder_instr_insert(b, &res->instr);
169 res_map[binding] = &res->dest.ssa;
170 return &res->dest.ssa;
171 }
172
173 nir_intrinsic_instr *
174 nir_load_store_vectorize_test::create_indirect_load(
175 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
176 unsigned bit_size, unsigned components, unsigned access)
177 {
178 nir_intrinsic_op intrinsic;
179 nir_ssa_def *res = NULL;
180 switch (mode) {
181 case nir_var_mem_ubo:
182 intrinsic = nir_intrinsic_load_ubo;
183 res = get_resource(binding, false);
184 break;
185 case nir_var_mem_ssbo:
186 intrinsic = nir_intrinsic_load_ssbo;
187 res = get_resource(binding, true);
188 break;
189 case nir_var_mem_push_const:
190 intrinsic = nir_intrinsic_load_push_constant;
191 break;
192 default:
193 return NULL;
194 }
195 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
196 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
197 load->num_components = components;
198 if (res) {
199 load->src[0] = nir_src_for_ssa(res);
200 load->src[1] = nir_src_for_ssa(offset);
201 } else {
202 load->src[0] = nir_src_for_ssa(offset);
203 }
204 if (mode != nir_var_mem_push_const) {
205 nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0);
206 nir_intrinsic_set_access(load, (gl_access_qualifier)access);
207 }
208 nir_builder_instr_insert(b, &load->instr);
209 nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
210 loads[id] = &nir_instr_as_alu(mov)->src[0];
211
212 return load;
213 }
214
215 void
216 nir_load_store_vectorize_test::create_indirect_store(
217 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
218 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
219 {
220 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
221 for (unsigned i = 0; i < components; i++)
222 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
223 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
224
225 nir_intrinsic_op intrinsic;
226 nir_ssa_def *res = NULL;
227 switch (mode) {
228 case nir_var_mem_ssbo:
229 intrinsic = nir_intrinsic_store_ssbo;
230 res = get_resource(binding, true);
231 break;
232 case nir_var_mem_shared:
233 intrinsic = nir_intrinsic_store_shared;
234 break;
235 default:
236 return;
237 }
238 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
239 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
240 store->num_components = components;
241 if (res) {
242 store->src[0] = nir_src_for_ssa(value);
243 store->src[1] = nir_src_for_ssa(res);
244 store->src[2] = nir_src_for_ssa(offset);
245 } else {
246 store->src[0] = nir_src_for_ssa(value);
247 store->src[1] = nir_src_for_ssa(offset);
248 }
249 nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
250 nir_intrinsic_set_access(store, (gl_access_qualifier)access);
251 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
252 nir_builder_instr_insert(b, &store->instr);
253 }
254
255 nir_intrinsic_instr *
256 nir_load_store_vectorize_test::create_load(
257 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
258 unsigned bit_size, unsigned components, unsigned access)
259 {
260 return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
261 }
262
263 void
264 nir_load_store_vectorize_test::create_store(
265 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
266 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
267 {
268 create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
269 }
270
271 void nir_load_store_vectorize_test::create_shared_load(
272 nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
273 {
274 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
275 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
276 load->num_components = components;
277 load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
278 nir_builder_instr_insert(b, &load->instr);
279 nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
280 loads[id] = &nir_instr_as_alu(mov)->src[0];
281 }
282
283 void nir_load_store_vectorize_test::create_shared_store(
284 nir_deref_instr *deref, uint32_t id,
285 unsigned bit_size, unsigned components, unsigned wrmask)
286 {
287 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
288 for (unsigned i = 0; i < components; i++)
289 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
290 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
291
292 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_deref);
293 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
294 store->num_components = components;
295 store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
296 store->src[1] = nir_src_for_ssa(value);
297 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
298 nir_builder_instr_insert(b, &store->instr);
299 }
300
301 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
302 {
303 return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
304 }
305
306 bool nir_load_store_vectorize_test::test_alu_def(
307 nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
308 {
309 if (instr->type != nir_instr_type_alu)
310 return false;
311
312 nir_alu_instr *alu = nir_instr_as_alu(instr);
313
314 if (index >= nir_op_infos[alu->op].num_inputs)
315 return false;
316 if (alu->src[index].src.ssa != def)
317 return false;
318 if (alu->src[index].swizzle[0] != swizzle)
319 return false;
320
321 return true;
322 }
323
324 bool nir_load_store_vectorize_test::mem_vectorize_callback(
325 unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
326 nir_intrinsic_instr *low, nir_intrinsic_instr *high)
327 {
328 return bit_size / 8;
329 }
330
331 void nir_load_store_vectorize_test::shared_type_info(
332 const struct glsl_type *type, unsigned *size, unsigned *align)
333 {
334 assert(glsl_type_is_vector_or_scalar(type));
335
336 uint32_t comp_size = glsl_type_is_boolean(type)
337 ? 4 : glsl_get_bit_size(type) / 8;
338 unsigned length = glsl_get_vector_elements(type);
339 *size = comp_size * length,
340 *align = comp_size;
341 }
342 } // namespace
343
344 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
345 {
346 create_load(nir_var_mem_ubo, 0, 0, 0x1);
347 create_load(nir_var_mem_ubo, 0, 4, 0x2);
348
349 nir_validate_shader(b->shader, NULL);
350 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
351
352 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
353
354 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
355
356 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
357 ASSERT_EQ(load->dest.ssa.bit_size, 32);
358 ASSERT_EQ(load->dest.ssa.num_components, 2);
359 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
360 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
361 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
362 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
363 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
364 }
365
366 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
367 {
368 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
369 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
370
371 nir_validate_shader(b->shader, NULL);
372 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
373
374 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
375
376 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
377
378 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
379 ASSERT_EQ(load->dest.ssa.bit_size, 32);
380 ASSERT_EQ(load->dest.ssa.num_components, 3);
381 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
382 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
383 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
384 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
385 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
386 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
387 ASSERT_EQ(loads[0x2]->swizzle[1], 2);
388 }
389
390 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
391 {
392 create_load(nir_var_mem_ubo, 0, 0, 0x1);
393 create_load(nir_var_mem_ubo, 0, 0, 0x2);
394
395 nir_validate_shader(b->shader, NULL);
396 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
397
398 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
399
400 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
401
402 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
403 ASSERT_EQ(load->dest.ssa.bit_size, 32);
404 ASSERT_EQ(load->dest.ssa.num_components, 1);
405 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
406 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
407 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
408 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
409 ASSERT_EQ(loads[0x2]->swizzle[0], 0);
410 }
411
412 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
413 {
414 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
415 create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
416
417 nir_validate_shader(b->shader, NULL);
418 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
419
420 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
421
422 nir_validate_shader(b->shader, NULL);
423 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
424 }
425
426 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
427 {
428 create_load(nir_var_mem_push_const, 0, 0, 0x1);
429 create_load(nir_var_mem_push_const, 0, 4, 0x2);
430
431 nir_validate_shader(b->shader, NULL);
432 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
433
434 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
435
436 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
437
438 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
439 ASSERT_EQ(load->dest.ssa.bit_size, 32);
440 ASSERT_EQ(load->dest.ssa.num_components, 2);
441 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
442 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
443 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
444 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
445 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
446 }
447
448 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
449 {
450 create_load(nir_var_mem_push_const, 0, 0, 0x1);
451 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
452
453 nir_validate_shader(b->shader, NULL);
454 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
455
456 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
457
458 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
459
460 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
461 ASSERT_EQ(load->dest.ssa.bit_size, 32);
462 ASSERT_EQ(load->dest.ssa.num_components, 2);
463 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
464 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
465 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
466 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
467 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
468 }
469
470 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
471 {
472 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
473 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
474
475 nir_validate_shader(b->shader, NULL);
476 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
477
478 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
479
480 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
481
482 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
483 ASSERT_EQ(load->dest.ssa.bit_size, 32);
484 ASSERT_EQ(load->dest.ssa.num_components, 2);
485 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
486 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
487 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
488 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
489 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
490 }
491
492 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
493 {
494 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
495 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
496 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
497
498 nir_validate_shader(b->shader, NULL);
499 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
500
501 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
502
503 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
504
505 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
506 ASSERT_EQ(load->dest.ssa.bit_size, 32);
507 ASSERT_EQ(load->dest.ssa.num_components, 2);
508 ASSERT_EQ(load->src[1].ssa, index_base);
509 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
510 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
511 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
512 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
513 }
514
515 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
516 {
517 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
518 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
519 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
520 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
521
522 nir_validate_shader(b->shader, NULL);
523 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
524
525 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
526
527 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
528
529 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
530 ASSERT_EQ(load->dest.ssa.bit_size, 32);
531 ASSERT_EQ(load->dest.ssa.num_components, 2);
532 ASSERT_EQ(load->src[1].ssa, index_base_prev);
533 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
534 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
535 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
536 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
537 }
538
539 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
540 {
541 nir_ssa_def *inv = nir_load_local_invocation_index(b);
542 nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
543 nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
544 nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
545 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
546 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
547
548 nir_validate_shader(b->shader, NULL);
549 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
550
551 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
552
553 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
554
555 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
556 ASSERT_EQ(load->dest.ssa.bit_size, 32);
557 ASSERT_EQ(load->dest.ssa.num_components, 2);
558 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
559 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
560 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
561 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
562
563 /* nir_opt_algebraic optimizes the imul */
564 ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
565 nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
566 ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
567 nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
568 ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
569 ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
570 }
571
572 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
573 {
574 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
575 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
576 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
577
578 nir_validate_shader(b->shader, NULL);
579 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
580
581 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
582
583 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
584
585 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
586 ASSERT_EQ(load->dest.ssa.bit_size, 32);
587 ASSERT_EQ(load->dest.ssa.num_components, 1);
588 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
589 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
590 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
591 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
592 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
593 }
594
595 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
596 {
597 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
598 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
599 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
600
601 nir_validate_shader(b->shader, NULL);
602 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
603
604 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
605
606 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
607 }
608
609 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
610 {
611 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
612 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
613 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
614
615 nir_validate_shader(b->shader, NULL);
616 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
617
618 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
619
620 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
621 }
622
623 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
624 {
625 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
626 create_load(nir_var_mem_ssbo, 0, 0, 0x2);
627 create_store(nir_var_mem_ssbo, 0, 0, 0x3);
628
629 nir_validate_shader(b->shader, NULL);
630 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
631
632 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
633
634 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
635 }
636
637 /* if nir_opt_load_store_vectorize were implemented like many load/store
638 * optimization passes are (for example, nir_opt_combine_stores and
639 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
640 * encountered, this case wouldn't be optimized.
641 * A similar test for derefs is shared_load_adjacent_store_identical. */
642 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
643 {
644 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
645 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
646 create_load(nir_var_mem_ssbo, 0, 4, 0x3);
647
648 nir_validate_shader(b->shader, NULL);
649 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
650 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
651
652 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
653
654 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
655 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
656
657 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
658 ASSERT_EQ(load->dest.ssa.bit_size, 32);
659 ASSERT_EQ(load->dest.ssa.num_components, 2);
660 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
661 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
662 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
663 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
664 ASSERT_EQ(loads[0x3]->swizzle[0], 1);
665 }
666
667 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
668 {
669 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
670 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
671
672 nir_validate_shader(b->shader, NULL);
673 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
674
675 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
676
677 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
678
679 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
680 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
681 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
682 nir_ssa_def *val = store->src[0].ssa;
683 ASSERT_EQ(val->bit_size, 32);
684 ASSERT_EQ(val->num_components, 2);
685 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
686 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
687 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
688 }
689
690 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
691 {
692 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
693 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
694
695 nir_validate_shader(b->shader, NULL);
696 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
697
698 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
699
700 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
701
702 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
703 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
704 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
705 nir_ssa_def *val = store->src[0].ssa;
706 ASSERT_EQ(val->bit_size, 32);
707 ASSERT_EQ(val->num_components, 3);
708 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
709 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
710 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
711 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
712 }
713
714 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
715 {
716 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
717 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
718
719 nir_validate_shader(b->shader, NULL);
720 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
721
722 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
723
724 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
725
726 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
727 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
728 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
729 nir_ssa_def *val = store->src[0].ssa;
730 ASSERT_EQ(val->bit_size, 32);
731 ASSERT_EQ(val->num_components, 1);
732 ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
733 }
734
735 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
736 {
737 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
738 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
739
740 nir_validate_shader(b->shader, NULL);
741 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
742
743 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
744
745 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
746 }
747
748 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
749 {
750 create_load(nir_var_mem_ubo, 0, 0, 0x1);
751 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr);
752 create_load(nir_var_mem_ubo, 0, 4, 0x2);
753
754 nir_validate_shader(b->shader, NULL);
755 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
756
757 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
758
759 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
760 }
761
762 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
763 {
764 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
765 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier)->instr);
766 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
767
768 nir_validate_shader(b->shader, NULL);
769 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
770
771 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
772
773 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
774 }
775
776 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
777 * doesn't require that loads/stores complete.
778 */
779 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
780 {
781 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
782 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_control_barrier)->instr);
783 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
784
785 nir_validate_shader(b->shader, NULL);
786 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
787
788 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
789
790 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
791 }
792
793 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
794 {
795 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
796 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier_shared)->instr);
797 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
798
799 nir_validate_shader(b->shader, NULL);
800 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
801
802 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
803
804 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
805 }
806
807 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
808 {
809 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
810 create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
811 create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
812
813 nir_validate_shader(b->shader, NULL);
814 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
815
816 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
817
818 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
819
820 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
821 ASSERT_EQ(load->dest.ssa.bit_size, 8);
822 ASSERT_EQ(load->dest.ssa.num_components, 4);
823 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
824 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
825 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
826 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
827 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
828
829 nir_ssa_def *val = loads[0x3]->src.ssa;
830 ASSERT_EQ(val->bit_size, 16);
831 ASSERT_EQ(val->num_components, 1);
832 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
833 nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
834 nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
835 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
836 high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
837 ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
838 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
839 ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
840 ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
841 }
842
843 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
844 {
845 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
846 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
847
848 nir_validate_shader(b->shader, NULL);
849 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
850
851 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
852
853 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
854
855 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
856 ASSERT_EQ(load->dest.ssa.bit_size, 32);
857 ASSERT_EQ(load->dest.ssa.num_components, 4);
858 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
859 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
860 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
861 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
862
863 nir_ssa_def *val = loads[0x2]->src.ssa;
864 ASSERT_EQ(val->bit_size, 64);
865 ASSERT_EQ(val->num_components, 1);
866 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
867 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
868 ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
869 ASSERT_EQ(pack->src[0].swizzle[0], 2);
870 ASSERT_EQ(pack->src[0].swizzle[1], 3);
871 }
872
873 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
874 {
875 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
876 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
877 create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
878
879 nir_validate_shader(b->shader, NULL);
880 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
881
882 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
883
884 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
885
886 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
887 ASSERT_EQ(load->dest.ssa.bit_size, 64);
888 ASSERT_EQ(load->dest.ssa.num_components, 3);
889 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
890 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
891 ASSERT_EQ(loads[0x3]->swizzle[0], 2);
892
893 /* pack_64_2x32(unpack_64_2x32()) is created because the 32-bit and first
894 * 64-bit loads are combined before the second 64-bit load is even considered. */
895 nir_ssa_def *val = loads[0x2]->src.ssa;
896 ASSERT_EQ(val->bit_size, 64);
897 ASSERT_EQ(val->num_components, 1);
898 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
899 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
900 ASSERT_TRUE(test_alu(pack->src[0].src.ssa->parent_instr, nir_op_unpack_64_2x32));
901 nir_alu_instr *unpack = nir_instr_as_alu(pack->src[0].src.ssa->parent_instr);
902 ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa);
903 ASSERT_EQ(unpack->src[0].swizzle[0], 1);
904
905 val = loads[0x1]->src.ssa;
906 ASSERT_EQ(val->bit_size, 32);
907 ASSERT_EQ(val->num_components, 2);
908 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
909 unpack = nir_instr_as_alu(val->parent_instr);
910 ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa);
911 ASSERT_EQ(unpack->src[0].swizzle[0], 0);
912 }
913
914 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
915 {
916 create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
917 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
918
919 nir_validate_shader(b->shader, NULL);
920 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
921
922 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
923
924 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
925
926 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
927 ASSERT_EQ(load->dest.ssa.bit_size, 32);
928 ASSERT_EQ(load->dest.ssa.num_components, 3);
929 ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
930 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
931 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
932 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
933
934 nir_ssa_def *val = loads[0x2]->src.ssa;
935 ASSERT_EQ(val->bit_size, 64);
936 ASSERT_EQ(val->num_components, 1);
937 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
938 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
939 ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
940 ASSERT_EQ(pack->src[0].swizzle[0], 1);
941 ASSERT_EQ(pack->src[0].swizzle[1], 2);
942 }
943
944 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
945 {
946 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
947 create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
948 create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
949
950 nir_validate_shader(b->shader, NULL);
951 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
952
953 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
954
955 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
956
957 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
958 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
959 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
960 nir_ssa_def *val = store->src[0].ssa;
961 ASSERT_EQ(val->bit_size, 8);
962 ASSERT_EQ(val->num_components, 4);
963 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
964 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
965 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
966 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x30);
967 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
968 }
969
970 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
971 {
972 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
973 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
974
975 nir_validate_shader(b->shader, NULL);
976 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
977
978 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
979
980 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
981
982 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
983 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
984 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
985 nir_ssa_def *val = store->src[0].ssa;
986 ASSERT_EQ(val->bit_size, 32);
987 ASSERT_EQ(val->num_components, 4);
988 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
989 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
990 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
991 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
992 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
993 }
994
995 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
996 {
997 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
998 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
999 create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1000
1001 nir_validate_shader(b->shader, NULL);
1002 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1003
1004 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1005
1006 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1007
1008 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1009 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1010 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1011 nir_ssa_def *val = store->src[0].ssa;
1012 ASSERT_EQ(val->bit_size, 64);
1013 ASSERT_EQ(val->num_components, 3);
1014 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1015 ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1016 ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1017 ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1018 }
1019
1020 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1021 {
1022 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1023 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1024
1025 nir_validate_shader(b->shader, NULL);
1026 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1027
1028 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1029
1030 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1031
1032 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1033 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1034 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1035 nir_ssa_def *val = store->src[0].ssa;
1036 ASSERT_EQ(val->bit_size, 32);
1037 ASSERT_EQ(val->num_components, 3);
1038 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1039 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1040 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1041 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1042 }
1043
1044 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1045 {
1046 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1047 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1048
1049 nir_validate_shader(b->shader, NULL);
1050 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1051
1052 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1053
1054 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1055 }
1056
1057 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1058 {
1059 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1060 create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1061
1062 nir_validate_shader(b->shader, NULL);
1063 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1064
1065 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1066
1067 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1068
1069 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1070 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1071 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1072 nir_ssa_def *val = store->src[0].ssa;
1073 ASSERT_EQ(val->bit_size, 32);
1074 ASSERT_EQ(val->num_components, 4);
1075 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1076 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1077 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1078 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1079 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1080 }
1081
1082 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1083 {
1084 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1085 nir_deref_instr *deref = nir_build_deref_var(b, var);
1086
1087 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1088 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1089
1090 nir_validate_shader(b->shader, NULL);
1091 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1092
1093 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1094
1095 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1096
1097 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1098 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1099 ASSERT_EQ(load->dest.ssa.num_components, 2);
1100
1101 deref = nir_src_as_deref(load->src[0]);
1102 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1103
1104 deref = nir_deref_instr_parent(deref);
1105 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1106 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1107
1108 deref = nir_deref_instr_parent(deref);
1109 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1110 ASSERT_EQ(deref->var, var);
1111
1112 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1113 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1114 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1115 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1116 }
1117
1118 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1119 {
1120 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1121 nir_deref_instr *deref = nir_build_deref_var(b, var);
1122 nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
1123
1124 create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1125 create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1126
1127 nir_validate_shader(b->shader, NULL);
1128 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1129
1130 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1131
1132 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1133 }
1134
1135 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1136 {
1137 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1138 nir_deref_instr *deref = nir_build_deref_var(b, var);
1139 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1140
1141 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1142 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1143
1144 nir_validate_shader(b->shader, NULL);
1145 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1146
1147 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1148
1149 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1150
1151 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1152 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1153 ASSERT_EQ(load->dest.ssa.num_components, 2);
1154
1155 deref = nir_src_as_deref(load->src[0]);
1156 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1157
1158 deref = nir_deref_instr_parent(deref);
1159 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1160 ASSERT_EQ(deref->arr.index.ssa, index_base);
1161
1162 deref = nir_deref_instr_parent(deref);
1163 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1164 ASSERT_EQ(deref->var, var);
1165
1166 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1167 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1168 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1169 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1170 }
1171
1172 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1173 {
1174 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1175 nir_deref_instr *deref = nir_build_deref_var(b, var);
1176 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1177 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1178
1179 create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1180 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1181
1182 nir_validate_shader(b->shader, NULL);
1183 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1184
1185 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1186
1187 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1188
1189 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1190 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1191 ASSERT_EQ(load->dest.ssa.num_components, 2);
1192
1193 deref = nir_src_as_deref(load->src[0]);
1194 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1195
1196 deref = nir_deref_instr_parent(deref);
1197 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1198 ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1199
1200 deref = nir_deref_instr_parent(deref);
1201 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1202 ASSERT_EQ(deref->var, var);
1203
1204 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1205 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1206 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1207 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1208 }
1209
1210 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1211 {
1212 glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1213 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1214
1215 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1216 nir_deref_instr *deref = nir_build_deref_var(b, var);
1217
1218 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1219 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1220
1221 nir_validate_shader(b->shader, NULL);
1222 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1223
1224 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1225
1226 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1227
1228 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1229 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1230 ASSERT_EQ(load->dest.ssa.num_components, 2);
1231
1232 deref = nir_src_as_deref(load->src[0]);
1233 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1234
1235 deref = nir_deref_instr_parent(deref);
1236 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1237 ASSERT_EQ(deref->strct.index, 0);
1238
1239 deref = nir_deref_instr_parent(deref);
1240 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1241 ASSERT_EQ(deref->var, var);
1242
1243 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1244 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1245 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1246 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1247 }
1248
1249 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1250 {
1251 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1252 nir_deref_instr *deref = nir_build_deref_var(b, var);
1253
1254 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1255 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1256 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1257
1258 nir_validate_shader(b->shader, NULL);
1259 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1260 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1261
1262 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1263
1264 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1265 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1266
1267 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1268 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1269 ASSERT_EQ(load->dest.ssa.num_components, 1);
1270
1271 deref = nir_src_as_deref(load->src[0]);
1272 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1273 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1274
1275 deref = nir_deref_instr_parent(deref);
1276 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1277 ASSERT_EQ(deref->var, var);
1278
1279 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1280 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1281 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1282 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1283 }
1284
1285 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1286 {
1287 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1288 nir_deref_instr *deref = nir_build_deref_var(b, var);
1289
1290 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1291 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1292 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1293
1294 nir_validate_shader(b->shader, NULL);
1295 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1296
1297 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1298
1299 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1300 }
1301
1302 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1303 {
1304 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1305 nir_deref_instr *deref = nir_build_deref_var(b, var);
1306
1307 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1308 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1309 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1310
1311 nir_validate_shader(b->shader, NULL);
1312 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1313 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1314
1315 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1316
1317 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1318 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1319
1320 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1321 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1322 ASSERT_EQ(load->dest.ssa.num_components, 2);
1323
1324 deref = nir_src_as_deref(load->src[0]);
1325 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1326
1327 deref = nir_deref_instr_parent(deref);
1328 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1329 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1330
1331 deref = nir_deref_instr_parent(deref);
1332 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1333 ASSERT_EQ(deref->var, var);
1334
1335 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1336 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1337 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1338 ASSERT_EQ(loads[0x3]->swizzle[0], 1);
1339 }
1340
1341 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1342 {
1343 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1344 nir_deref_instr *deref = nir_build_deref_var(b, var);
1345
1346 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1347 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1348
1349 nir_validate_shader(b->shader, NULL);
1350 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1351
1352 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1353
1354 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1355
1356 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1357 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1358 ASSERT_EQ(load->dest.ssa.num_components, 2);
1359
1360 deref = nir_src_as_deref(load->src[0]);
1361 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1362
1363 deref = nir_deref_instr_parent(deref);
1364 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1365 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1366
1367 deref = nir_deref_instr_parent(deref);
1368 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1369 ASSERT_EQ(deref->var, var);
1370
1371 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1372 ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
1373 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1374 ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
1375 }
1376
1377 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1378 {
1379 glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1380 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1381
1382 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1383 nir_deref_instr *deref = nir_build_deref_var(b, var);
1384
1385 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1386 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1387
1388 nir_validate_shader(b->shader, NULL);
1389 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1390
1391 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1392
1393 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1394
1395 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1396 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1397 ASSERT_EQ(load->dest.ssa.num_components, 2);
1398
1399 deref = nir_src_as_deref(load->src[0]);
1400 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1401
1402 deref = nir_deref_instr_parent(deref);
1403 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1404 ASSERT_EQ(deref->strct.index, 0);
1405
1406 deref = nir_deref_instr_parent(deref);
1407 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1408 ASSERT_EQ(deref->var, var);
1409
1410 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1411 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1412 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1413 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1414 }
1415
1416 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1417 {
1418 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1419 nir_deref_instr *deref = nir_build_deref_var(b, var);
1420
1421 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1422 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1423
1424 nir_validate_shader(b->shader, NULL);
1425 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1426
1427 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1428
1429 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1430
1431 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1432 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1433 nir_ssa_def *val = store->src[1].ssa;
1434 ASSERT_EQ(val->bit_size, 32);
1435 ASSERT_EQ(val->num_components, 2);
1436 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1437 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1438 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1439
1440 deref = nir_src_as_deref(store->src[0]);
1441 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1442
1443 deref = nir_deref_instr_parent(deref);
1444 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1445 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1446
1447 deref = nir_deref_instr_parent(deref);
1448 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1449 ASSERT_EQ(deref->var, var);
1450 }
1451
1452 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1453 {
1454 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1455 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1456
1457 nir_validate_shader(b->shader, NULL);
1458 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1459
1460 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1461
1462 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1463 }
1464
1465 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1466 {
1467 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1468 create_load(nir_var_mem_push_const, 0, 8, 0x2);
1469
1470 nir_validate_shader(b->shader, NULL);
1471 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1472
1473 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1474
1475 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1476 }
1477
1478 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1479 {
1480 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1481 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1482 create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1483
1484 nir_validate_shader(b->shader, NULL);
1485 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1486
1487 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1488
1489 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1490 }
1491
1492 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1493 {
1494 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1495 create_indirect_load(nir_var_mem_push_const, 0,
1496 nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 2)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x1);
1497 create_indirect_load(nir_var_mem_push_const, 0,
1498 nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 3)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x2);
1499
1500 nir_validate_shader(b->shader, NULL);
1501 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1502
1503 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1504
1505 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1506 }
1507
1508 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1509 {
1510 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1511 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1512 nir_ssa_def *low = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 12));
1513 nir_ssa_def *high = nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 1)), nir_imm_int(b, 16));
1514 create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1515 create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1516
1517 nir_validate_shader(b->shader, NULL);
1518 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1519
1520 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1521
1522 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1523
1524 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1525 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1526 ASSERT_EQ(load->dest.ssa.num_components, 2);
1527 ASSERT_EQ(load->src[0].ssa, low);
1528 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1529 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1530 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1531 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1532 }
1533
1534 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1535 {
1536 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1537 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1538 create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1539 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1540
1541 nir_validate_shader(b->shader, NULL);
1542 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1543
1544 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1545
1546 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1547 }
1548
1549 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1550 {
1551 nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
1552 nir_ssa_def *store_base = nir_load_local_invocation_index(b);
1553 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1554 create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1555 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1556
1557 nir_validate_shader(b->shader, NULL);
1558 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1559
1560 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1561
1562 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1563 }
1564
1565 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1566 {
1567 /* TODO: try to combine these loads */
1568 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1569 nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 4));
1570 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1571 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1572 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1573
1574 nir_validate_shader(b->shader, NULL);
1575 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1576
1577 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1578
1579 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1580
1581 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1582 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1583 ASSERT_EQ(load->dest.ssa.num_components, 1);
1584 ASSERT_EQ(load->src[1].ssa, offset);
1585 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1586 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1587 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1588 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1589 }
1590
1591 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1592 {
1593 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1594 * these loads can't be combined because if index_base == 268435455, then
1595 * offset == 0 because the addition would wrap around */
1596 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1597 nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
1598 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1599 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1600 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1601
1602 nir_validate_shader(b->shader, NULL);
1603 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1604
1605 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1606
1607 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1608 }
1609
1610 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1611 {
1612 /* TODO: try to combine these loads */
1613 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1614 nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
1615 nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1616 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1617 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1618 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1619
1620 nir_validate_shader(b->shader, NULL);
1621 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1622
1623 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1624
1625 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1626
1627 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1628 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1629 ASSERT_EQ(load->dest.ssa.num_components, 1);
1630 ASSERT_EQ(load->src[1].ssa, offset);
1631 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1632 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1633 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1634 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1635 }
1636
1637 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1638 {
1639 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1640 create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1641 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1642
1643 nir_validate_shader(b->shader, NULL);
1644 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1645
1646 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1647
1648 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1649 }
1650
1651 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1652 {
1653 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1654 create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1655 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1656
1657 nir_validate_shader(b->shader, NULL);
1658 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1659
1660 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1661
1662 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1663
1664 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1665 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1666 ASSERT_EQ(load->dest.ssa.num_components, 1);
1667 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1668 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1669 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1670 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1671 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1672 }
1673
1674 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1675 {
1676 /* TODO: implement type-based alias analysis so that these loads can be
1677 * combined. this is made a bit more difficult than simply using
1678 * nir_compare_derefs() because the vectorizer creates loads/stores with
1679 * casted derefs. The solution would probably be to keep multiple derefs for
1680 * an entry (one for each load/store combined into it). */
1681 glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1682 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1683
1684 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1685 nir_deref_instr *deref = nir_build_deref_var(b, var);
1686
1687 nir_ssa_def *index0 = nir_load_local_invocation_index(b);
1688 nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
1689 nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1690
1691 create_shared_load(load_deref, 0x1);
1692 create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1693 create_shared_load(load_deref, 0x3);
1694
1695 nir_validate_shader(b->shader, NULL);
1696 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1697
1698 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1699
1700 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1701
1702 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1703 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1704 ASSERT_EQ(load->dest.ssa.num_components, 1);
1705 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1706 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1707 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1708 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1709 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1710 }
1711
1712 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1713 {
1714 nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1715 nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1716 nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1717
1718 create_shared_load(load_deref, 0x1);
1719 create_shared_store(nir_build_deref_var(b, var1), 0x2);
1720 create_shared_load(load_deref, 0x3);
1721
1722 nir_validate_shader(b->shader, NULL);
1723 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1724
1725 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1726
1727 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1728
1729 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1730 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1731 ASSERT_EQ(load->dest.ssa.num_components, 1);
1732 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1733 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1734 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1735 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1736 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1737 }
1738
1739 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1740 {
1741 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x100000000, 64), 0x1);
1742 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x200000004, 64), 0x2);
1743
1744 nir_validate_shader(b->shader, NULL);
1745 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1746
1747 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1748
1749 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1750 }
1751
1752 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1753 {
1754 nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1755 nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
1756 nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
1757 create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1758 create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1759
1760 nir_validate_shader(b->shader, NULL);
1761 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1762
1763 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1764
1765 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1766 }