nir: Move nir_lower_mediump_outputs from ir3
[mesa.git] / src / compiler / nir / tests / load_store_vectorizer_tests.cpp
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25
26 #include "nir.h"
27 #include "nir_builder.h"
28
29 namespace {
30
31 class nir_load_store_vectorize_test : public ::testing::Test {
32 protected:
33 nir_load_store_vectorize_test();
34 ~nir_load_store_vectorize_test();
35
36 unsigned count_intrinsics(nir_intrinsic_op intrinsic);
37
38 nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
39 unsigned index);
40
41 bool run_vectorizer(nir_variable_mode modes, bool cse=false);
42
43 nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
44
45 nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
46 uint32_t id, unsigned bit_size=32, unsigned components=1,
47 unsigned access=0);
48 void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
49 uint32_t id, unsigned bit_size=32, unsigned components=1,
50 unsigned wrmask=0xf, unsigned access=0);
51
52 nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
53 uint32_t id, unsigned bit_size=32, unsigned components=1,
54 unsigned access=0);
55 void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
56 uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
57 unsigned access=0);
58
59 void create_shared_load(nir_deref_instr *deref, uint32_t id,
60 unsigned bit_size=32, unsigned components=1);
61 void create_shared_store(nir_deref_instr *deref, uint32_t id,
62 unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
63
64 bool test_alu(nir_instr *instr, nir_op op);
65 bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
66
67 static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
68 unsigned num_components, unsigned high_offset,
69 nir_intrinsic_instr *low, nir_intrinsic_instr *high);
70 static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
71
72 void *mem_ctx;
73
74 nir_builder *b;
75 std::map<unsigned, nir_alu_src*> loads;
76 std::map<unsigned, nir_ssa_def*> res_map;
77 };
78
79 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
80 {
81 glsl_type_singleton_init_or_ref();
82
83 mem_ctx = ralloc_context(NULL);
84 static const nir_shader_compiler_options options = { };
85 b = rzalloc(mem_ctx, nir_builder);
86 nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_COMPUTE, &options);
87 }
88
89 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
90 {
91 if (HasFailure()) {
92 printf("\nShader from the failed test:\n\n");
93 nir_print_shader(b->shader, stdout);
94 }
95
96 ralloc_free(mem_ctx);
97
98 glsl_type_singleton_decref();
99 }
100
101 unsigned
102 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
103 {
104 unsigned count = 0;
105 nir_foreach_block(block, b->impl) {
106 nir_foreach_instr(instr, block) {
107 if (instr->type != nir_instr_type_intrinsic)
108 continue;
109 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
110 if (intrin->intrinsic == intrinsic)
111 count++;
112 }
113 }
114 return count;
115 }
116
117 nir_intrinsic_instr *
118 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
119 unsigned index)
120 {
121 nir_foreach_block(block, b->impl) {
122 nir_foreach_instr(instr, block) {
123 if (instr->type != nir_instr_type_intrinsic)
124 continue;
125 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
126 if (intrin->intrinsic == intrinsic) {
127 if (index == 0)
128 return intrin;
129 index--;
130 }
131 }
132 }
133 return NULL;
134 }
135
136 bool
137 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes, bool cse)
138 {
139 if (modes & nir_var_mem_shared)
140 nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
141 bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback);
142 if (progress) {
143 nir_validate_shader(b->shader, NULL);
144 if (cse)
145 nir_opt_cse(b->shader);
146 nir_copy_prop(b->shader);
147 nir_opt_algebraic(b->shader);
148 nir_opt_constant_folding(b->shader);
149 }
150 return progress;
151 }
152
153 nir_ssa_def *
154 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
155 {
156 if (res_map.count(binding))
157 return res_map[binding];
158
159 nir_intrinsic_instr *res = nir_intrinsic_instr_create(
160 b->shader, nir_intrinsic_vulkan_resource_index);
161 nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
162 res->num_components = 1;
163 res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
164 nir_intrinsic_set_desc_type(
165 res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
166 nir_intrinsic_set_desc_set(res, 0);
167 nir_intrinsic_set_binding(res, binding);
168 nir_builder_instr_insert(b, &res->instr);
169 res_map[binding] = &res->dest.ssa;
170 return &res->dest.ssa;
171 }
172
173 nir_intrinsic_instr *
174 nir_load_store_vectorize_test::create_indirect_load(
175 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
176 unsigned bit_size, unsigned components, unsigned access)
177 {
178 nir_intrinsic_op intrinsic;
179 nir_ssa_def *res = NULL;
180 switch (mode) {
181 case nir_var_mem_ubo:
182 intrinsic = nir_intrinsic_load_ubo;
183 res = get_resource(binding, false);
184 break;
185 case nir_var_mem_ssbo:
186 intrinsic = nir_intrinsic_load_ssbo;
187 res = get_resource(binding, true);
188 break;
189 case nir_var_mem_push_const:
190 intrinsic = nir_intrinsic_load_push_constant;
191 break;
192 default:
193 return NULL;
194 }
195 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
196 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
197 load->num_components = components;
198 if (res) {
199 load->src[0] = nir_src_for_ssa(res);
200 load->src[1] = nir_src_for_ssa(offset);
201 } else {
202 load->src[0] = nir_src_for_ssa(offset);
203 }
204 if (mode != nir_var_mem_push_const) {
205 nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0);
206 nir_intrinsic_set_access(load, (gl_access_qualifier)access);
207 }
208 nir_builder_instr_insert(b, &load->instr);
209 nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
210 loads[id] = &nir_instr_as_alu(mov)->src[0];
211
212 return load;
213 }
214
215 void
216 nir_load_store_vectorize_test::create_indirect_store(
217 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
218 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
219 {
220 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
221 for (unsigned i = 0; i < components; i++)
222 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
223 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
224
225 nir_intrinsic_op intrinsic;
226 nir_ssa_def *res = NULL;
227 switch (mode) {
228 case nir_var_mem_ssbo:
229 intrinsic = nir_intrinsic_store_ssbo;
230 res = get_resource(binding, true);
231 break;
232 case nir_var_mem_shared:
233 intrinsic = nir_intrinsic_store_shared;
234 break;
235 default:
236 return;
237 }
238 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
239 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
240 store->num_components = components;
241 if (res) {
242 store->src[0] = nir_src_for_ssa(value);
243 store->src[1] = nir_src_for_ssa(res);
244 store->src[2] = nir_src_for_ssa(offset);
245 } else {
246 store->src[0] = nir_src_for_ssa(value);
247 store->src[1] = nir_src_for_ssa(offset);
248 }
249 nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
250 nir_intrinsic_set_access(store, (gl_access_qualifier)access);
251 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
252 nir_builder_instr_insert(b, &store->instr);
253 }
254
255 nir_intrinsic_instr *
256 nir_load_store_vectorize_test::create_load(
257 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
258 unsigned bit_size, unsigned components, unsigned access)
259 {
260 return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
261 }
262
263 void
264 nir_load_store_vectorize_test::create_store(
265 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
266 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
267 {
268 create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
269 }
270
271 void nir_load_store_vectorize_test::create_shared_load(
272 nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
273 {
274 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
275 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
276 load->num_components = components;
277 load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
278 nir_builder_instr_insert(b, &load->instr);
279 nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
280 loads[id] = &nir_instr_as_alu(mov)->src[0];
281 }
282
283 void nir_load_store_vectorize_test::create_shared_store(
284 nir_deref_instr *deref, uint32_t id,
285 unsigned bit_size, unsigned components, unsigned wrmask)
286 {
287 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
288 for (unsigned i = 0; i < components; i++)
289 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
290 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
291
292 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_deref);
293 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
294 store->num_components = components;
295 store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
296 store->src[1] = nir_src_for_ssa(value);
297 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
298 nir_builder_instr_insert(b, &store->instr);
299 }
300
301 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
302 {
303 return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
304 }
305
306 bool nir_load_store_vectorize_test::test_alu_def(
307 nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
308 {
309 if (instr->type != nir_instr_type_alu)
310 return false;
311
312 nir_alu_instr *alu = nir_instr_as_alu(instr);
313
314 if (index >= nir_op_infos[alu->op].num_inputs)
315 return false;
316 if (alu->src[index].src.ssa != def)
317 return false;
318 if (alu->src[index].swizzle[0] != swizzle)
319 return false;
320
321 return true;
322 }
323
324 bool nir_load_store_vectorize_test::mem_vectorize_callback(
325 unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
326 nir_intrinsic_instr *low, nir_intrinsic_instr *high)
327 {
328 return bit_size / 8;
329 }
330
331 void nir_load_store_vectorize_test::shared_type_info(
332 const struct glsl_type *type, unsigned *size, unsigned *align)
333 {
334 assert(glsl_type_is_vector_or_scalar(type));
335
336 uint32_t comp_size = glsl_type_is_boolean(type)
337 ? 4 : glsl_get_bit_size(type) / 8;
338 unsigned length = glsl_get_vector_elements(type);
339 *size = comp_size * length,
340 *align = comp_size;
341 }
342 } // namespace
343
344 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
345 {
346 create_load(nir_var_mem_ubo, 0, 0, 0x1);
347 create_load(nir_var_mem_ubo, 0, 4, 0x2);
348
349 nir_validate_shader(b->shader, NULL);
350 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
351
352 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
353
354 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
355
356 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
357 ASSERT_EQ(load->dest.ssa.bit_size, 32);
358 ASSERT_EQ(load->dest.ssa.num_components, 2);
359 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
360 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
361 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
362 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
363 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
364 }
365
366 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
367 {
368 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
369 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
370
371 nir_validate_shader(b->shader, NULL);
372 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
373
374 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
375
376 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
377
378 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
379 ASSERT_EQ(load->dest.ssa.bit_size, 32);
380 ASSERT_EQ(load->dest.ssa.num_components, 3);
381 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
382 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
383 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
384 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
385 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
386 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
387 ASSERT_EQ(loads[0x2]->swizzle[1], 2);
388 }
389
390 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
391 {
392 create_load(nir_var_mem_ubo, 0, 0, 0x1);
393 create_load(nir_var_mem_ubo, 0, 0, 0x2);
394
395 nir_validate_shader(b->shader, NULL);
396 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
397
398 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
399
400 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
401
402 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
403 ASSERT_EQ(load->dest.ssa.bit_size, 32);
404 ASSERT_EQ(load->dest.ssa.num_components, 1);
405 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
406 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
407 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
408 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
409 ASSERT_EQ(loads[0x2]->swizzle[0], 0);
410 }
411
412 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
413 {
414 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
415 create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
416
417 nir_validate_shader(b->shader, NULL);
418 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
419
420 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
421
422 nir_validate_shader(b->shader, NULL);
423 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
424 }
425
426 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
427 {
428 create_load(nir_var_mem_push_const, 0, 0, 0x1);
429 create_load(nir_var_mem_push_const, 0, 4, 0x2);
430
431 nir_validate_shader(b->shader, NULL);
432 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
433
434 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
435
436 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
437
438 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
439 ASSERT_EQ(load->dest.ssa.bit_size, 32);
440 ASSERT_EQ(load->dest.ssa.num_components, 2);
441 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
442 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
443 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
444 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
445 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
446 }
447
448 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
449 {
450 create_load(nir_var_mem_push_const, 0, 0, 0x1);
451 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
452
453 nir_validate_shader(b->shader, NULL);
454 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
455
456 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
457
458 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
459
460 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
461 ASSERT_EQ(load->dest.ssa.bit_size, 32);
462 ASSERT_EQ(load->dest.ssa.num_components, 2);
463 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
464 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
465 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
466 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
467 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
468 }
469
470 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
471 {
472 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
473 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
474
475 nir_validate_shader(b->shader, NULL);
476 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
477
478 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
479
480 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
481
482 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
483 ASSERT_EQ(load->dest.ssa.bit_size, 32);
484 ASSERT_EQ(load->dest.ssa.num_components, 2);
485 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
486 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
487 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
488 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
489 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
490 }
491
492 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
493 {
494 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
495 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
496 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
497
498 nir_validate_shader(b->shader, NULL);
499 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
500
501 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
502
503 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
504
505 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
506 ASSERT_EQ(load->dest.ssa.bit_size, 32);
507 ASSERT_EQ(load->dest.ssa.num_components, 2);
508 ASSERT_EQ(load->src[1].ssa, index_base);
509 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
510 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
511 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
512 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
513 }
514
515 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
516 {
517 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
518 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
519 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
520 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
521
522 nir_validate_shader(b->shader, NULL);
523 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
524
525 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
526
527 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
528
529 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
530 ASSERT_EQ(load->dest.ssa.bit_size, 32);
531 ASSERT_EQ(load->dest.ssa.num_components, 2);
532 ASSERT_EQ(load->src[1].ssa, index_base_prev);
533 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
534 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
535 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
536 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
537 }
538
539 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
540 {
541 nir_ssa_def *inv = nir_load_local_invocation_index(b);
542 nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
543 nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
544 nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
545 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
546 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
547
548 nir_validate_shader(b->shader, NULL);
549 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
550
551 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
552
553 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
554
555 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
556 ASSERT_EQ(load->dest.ssa.bit_size, 32);
557 ASSERT_EQ(load->dest.ssa.num_components, 2);
558 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
559 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
560 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
561 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
562
563 /* nir_opt_algebraic optimizes the imul */
564 ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
565 nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
566 ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
567 nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
568 ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
569 ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
570 }
571
572 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
573 {
574 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
575 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
576 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
577
578 nir_validate_shader(b->shader, NULL);
579 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
580
581 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
582
583 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
584
585 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
586 ASSERT_EQ(load->dest.ssa.bit_size, 32);
587 ASSERT_EQ(load->dest.ssa.num_components, 1);
588 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
589 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
590 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
591 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
592 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
593 }
594
595 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
596 {
597 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
598 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
599 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
600
601 nir_validate_shader(b->shader, NULL);
602 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
603
604 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
605
606 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
607 }
608
609 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
610 {
611 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
612 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
613 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
614
615 nir_validate_shader(b->shader, NULL);
616 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
617
618 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
619
620 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
621 }
622
623 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
624 {
625 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
626 create_load(nir_var_mem_ssbo, 0, 0, 0x2);
627 create_store(nir_var_mem_ssbo, 0, 0, 0x3);
628
629 nir_validate_shader(b->shader, NULL);
630 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
631
632 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
633
634 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
635 }
636
637 /* if nir_opt_load_store_vectorize were implemented like many load/store
638 * optimization passes are (for example, nir_opt_combine_stores and
639 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
640 * encountered, this case wouldn't be optimized.
641 * A similar test for derefs is shared_load_adjacent_store_identical. */
642 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
643 {
644 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
645 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
646 create_load(nir_var_mem_ssbo, 0, 4, 0x3);
647
648 nir_validate_shader(b->shader, NULL);
649 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
650 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
651
652 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
653
654 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
655 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
656
657 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
658 ASSERT_EQ(load->dest.ssa.bit_size, 32);
659 ASSERT_EQ(load->dest.ssa.num_components, 2);
660 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
661 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
662 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
663 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
664 ASSERT_EQ(loads[0x3]->swizzle[0], 1);
665 }
666
667 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
668 {
669 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
670 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
671
672 nir_validate_shader(b->shader, NULL);
673 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
674
675 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
676
677 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
678
679 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
680 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
681 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
682 nir_ssa_def *val = store->src[0].ssa;
683 ASSERT_EQ(val->bit_size, 32);
684 ASSERT_EQ(val->num_components, 2);
685 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
686 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
687 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
688 }
689
690 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
691 {
692 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
693 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
694
695 nir_validate_shader(b->shader, NULL);
696 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
697
698 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
699
700 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
701
702 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
703 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
704 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
705 nir_ssa_def *val = store->src[0].ssa;
706 ASSERT_EQ(val->bit_size, 32);
707 ASSERT_EQ(val->num_components, 3);
708 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
709 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
710 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
711 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
712 }
713
714 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
715 {
716 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
717 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
718
719 nir_validate_shader(b->shader, NULL);
720 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
721
722 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
723
724 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
725
726 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
727 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
728 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
729 nir_ssa_def *val = store->src[0].ssa;
730 ASSERT_EQ(val->bit_size, 32);
731 ASSERT_EQ(val->num_components, 1);
732 ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
733 }
734
735 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
736 {
737 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
738 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
739
740 nir_validate_shader(b->shader, NULL);
741 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
742
743 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
744
745 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
746 }
747
748 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
749 {
750 create_load(nir_var_mem_ubo, 0, 0, 0x1);
751
752 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
753 nir_var_mem_ssbo);
754
755 create_load(nir_var_mem_ubo, 0, 4, 0x2);
756
757 nir_validate_shader(b->shader, NULL);
758 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
759
760 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
761
762 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
763 }
764
765 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
766 {
767 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
768
769 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
770 nir_var_mem_ssbo);
771
772 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
773
774 nir_validate_shader(b->shader, NULL);
775 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
776
777 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
778
779 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
780 }
781
782 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
783 * doesn't require that loads/stores complete.
784 */
785 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
786 {
787 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
788 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_control_barrier)->instr);
789 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
790
791 nir_validate_shader(b->shader, NULL);
792 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
793
794 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
795
796 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
797 }
798
799 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
800 {
801 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
802
803 nir_scoped_memory_barrier(b, NIR_SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
804 nir_var_mem_shared);
805
806 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
807
808 nir_validate_shader(b->shader, NULL);
809 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
810
811 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
812
813 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
814 }
815
816 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
817 {
818 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
819 create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
820 create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
821
822 nir_validate_shader(b->shader, NULL);
823 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
824
825 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
826
827 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
828
829 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
830 ASSERT_EQ(load->dest.ssa.bit_size, 8);
831 ASSERT_EQ(load->dest.ssa.num_components, 4);
832 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
833 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
834 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
835 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
836 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
837
838 nir_ssa_def *val = loads[0x3]->src.ssa;
839 ASSERT_EQ(val->bit_size, 16);
840 ASSERT_EQ(val->num_components, 1);
841 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
842 nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
843 nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
844 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
845 high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
846 ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
847 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
848 ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
849 ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
850 }
851
852 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
853 {
854 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
855 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
856
857 nir_validate_shader(b->shader, NULL);
858 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
859
860 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
861
862 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
863
864 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
865 ASSERT_EQ(load->dest.ssa.bit_size, 32);
866 ASSERT_EQ(load->dest.ssa.num_components, 4);
867 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
868 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
869 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
870 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
871
872 nir_ssa_def *val = loads[0x2]->src.ssa;
873 ASSERT_EQ(val->bit_size, 64);
874 ASSERT_EQ(val->num_components, 1);
875 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
876 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
877 ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
878 ASSERT_EQ(pack->src[0].swizzle[0], 2);
879 ASSERT_EQ(pack->src[0].swizzle[1], 3);
880 }
881
882 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
883 {
884 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
885 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
886 create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
887
888 nir_validate_shader(b->shader, NULL);
889 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
890
891 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
892
893 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
894
895 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
896 ASSERT_EQ(load->dest.ssa.bit_size, 64);
897 ASSERT_EQ(load->dest.ssa.num_components, 3);
898 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
899 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
900 ASSERT_EQ(loads[0x3]->swizzle[0], 2);
901
902 /* pack_64_2x32(unpack_64_2x32()) is created because the 32-bit and first
903 * 64-bit loads are combined before the second 64-bit load is even considered. */
904 nir_ssa_def *val = loads[0x2]->src.ssa;
905 ASSERT_EQ(val->bit_size, 64);
906 ASSERT_EQ(val->num_components, 1);
907 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
908 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
909 ASSERT_TRUE(test_alu(pack->src[0].src.ssa->parent_instr, nir_op_unpack_64_2x32));
910 nir_alu_instr *unpack = nir_instr_as_alu(pack->src[0].src.ssa->parent_instr);
911 ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa);
912 ASSERT_EQ(unpack->src[0].swizzle[0], 1);
913
914 val = loads[0x1]->src.ssa;
915 ASSERT_EQ(val->bit_size, 32);
916 ASSERT_EQ(val->num_components, 2);
917 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
918 unpack = nir_instr_as_alu(val->parent_instr);
919 ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa);
920 ASSERT_EQ(unpack->src[0].swizzle[0], 0);
921 }
922
923 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
924 {
925 create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
926 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
927
928 nir_validate_shader(b->shader, NULL);
929 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
930
931 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
932
933 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
934
935 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
936 ASSERT_EQ(load->dest.ssa.bit_size, 32);
937 ASSERT_EQ(load->dest.ssa.num_components, 3);
938 ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
939 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
940 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
941 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
942
943 nir_ssa_def *val = loads[0x2]->src.ssa;
944 ASSERT_EQ(val->bit_size, 64);
945 ASSERT_EQ(val->num_components, 1);
946 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
947 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
948 ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
949 ASSERT_EQ(pack->src[0].swizzle[0], 1);
950 ASSERT_EQ(pack->src[0].swizzle[1], 2);
951 }
952
953 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
954 {
955 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
956 create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
957 create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
958
959 nir_validate_shader(b->shader, NULL);
960 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
961
962 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
963
964 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
965
966 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
967 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
968 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
969 nir_ssa_def *val = store->src[0].ssa;
970 ASSERT_EQ(val->bit_size, 8);
971 ASSERT_EQ(val->num_components, 4);
972 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
973 ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
974 ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
975 ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
976 ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
977 }
978
979 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
980 {
981 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
982 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
983
984 nir_validate_shader(b->shader, NULL);
985 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
986
987 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
988
989 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
990
991 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
992 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
993 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
994 nir_ssa_def *val = store->src[0].ssa;
995 ASSERT_EQ(val->bit_size, 32);
996 ASSERT_EQ(val->num_components, 4);
997 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
998 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
999 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
1000 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1001 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1002 }
1003
1004 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1005 {
1006 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1007 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1008 create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1009
1010 nir_validate_shader(b->shader, NULL);
1011 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1012
1013 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1014
1015 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1016
1017 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1018 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1019 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1020 nir_ssa_def *val = store->src[0].ssa;
1021 ASSERT_EQ(val->bit_size, 64);
1022 ASSERT_EQ(val->num_components, 3);
1023 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1024 ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1025 ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1026 ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1027 }
1028
1029 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1030 {
1031 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1032 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1033
1034 nir_validate_shader(b->shader, NULL);
1035 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1036
1037 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1038
1039 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1040
1041 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1042 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1043 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1044 nir_ssa_def *val = store->src[0].ssa;
1045 ASSERT_EQ(val->bit_size, 32);
1046 ASSERT_EQ(val->num_components, 3);
1047 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1048 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1049 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1050 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1051 }
1052
1053 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1054 {
1055 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1056 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1057
1058 nir_validate_shader(b->shader, NULL);
1059 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1060
1061 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1062
1063 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1064 }
1065
1066 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1067 {
1068 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1069 create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1070
1071 nir_validate_shader(b->shader, NULL);
1072 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1073
1074 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1075
1076 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1077
1078 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1079 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1080 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1081 nir_ssa_def *val = store->src[0].ssa;
1082 ASSERT_EQ(val->bit_size, 32);
1083 ASSERT_EQ(val->num_components, 4);
1084 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1085 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1086 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1087 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1088 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1089 }
1090
1091 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1092 {
1093 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1094 nir_deref_instr *deref = nir_build_deref_var(b, var);
1095
1096 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1097 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1098
1099 nir_validate_shader(b->shader, NULL);
1100 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1101
1102 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1103
1104 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1105
1106 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1107 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1108 ASSERT_EQ(load->dest.ssa.num_components, 2);
1109
1110 deref = nir_src_as_deref(load->src[0]);
1111 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1112
1113 deref = nir_deref_instr_parent(deref);
1114 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1115 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1116
1117 deref = nir_deref_instr_parent(deref);
1118 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1119 ASSERT_EQ(deref->var, var);
1120
1121 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1122 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1123 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1124 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1125 }
1126
1127 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1128 {
1129 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1130 nir_deref_instr *deref = nir_build_deref_var(b, var);
1131 nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
1132
1133 create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1134 create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1135
1136 nir_validate_shader(b->shader, NULL);
1137 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1138
1139 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1140
1141 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1142 }
1143
1144 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1145 {
1146 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1147 nir_deref_instr *deref = nir_build_deref_var(b, var);
1148 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1149
1150 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1151 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1152
1153 nir_validate_shader(b->shader, NULL);
1154 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1155
1156 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1157
1158 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1159
1160 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1161 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1162 ASSERT_EQ(load->dest.ssa.num_components, 2);
1163
1164 deref = nir_src_as_deref(load->src[0]);
1165 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1166
1167 deref = nir_deref_instr_parent(deref);
1168 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1169 ASSERT_EQ(deref->arr.index.ssa, index_base);
1170
1171 deref = nir_deref_instr_parent(deref);
1172 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1173 ASSERT_EQ(deref->var, var);
1174
1175 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1176 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1177 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1178 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1179 }
1180
1181 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1182 {
1183 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1184 nir_deref_instr *deref = nir_build_deref_var(b, var);
1185 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1186 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1187
1188 create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1189 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1190
1191 nir_validate_shader(b->shader, NULL);
1192 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1193
1194 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1195
1196 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1197
1198 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1199 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1200 ASSERT_EQ(load->dest.ssa.num_components, 2);
1201
1202 deref = nir_src_as_deref(load->src[0]);
1203 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1204
1205 deref = nir_deref_instr_parent(deref);
1206 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1207 ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1208
1209 deref = nir_deref_instr_parent(deref);
1210 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1211 ASSERT_EQ(deref->var, var);
1212
1213 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1214 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1215 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1216 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1217 }
1218
1219 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1220 {
1221 glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1222 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1223
1224 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1225 nir_deref_instr *deref = nir_build_deref_var(b, var);
1226
1227 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1228 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1229
1230 nir_validate_shader(b->shader, NULL);
1231 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1232
1233 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1234
1235 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1236
1237 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1238 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1239 ASSERT_EQ(load->dest.ssa.num_components, 2);
1240
1241 deref = nir_src_as_deref(load->src[0]);
1242 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1243
1244 deref = nir_deref_instr_parent(deref);
1245 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1246 ASSERT_EQ(deref->strct.index, 0);
1247
1248 deref = nir_deref_instr_parent(deref);
1249 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1250 ASSERT_EQ(deref->var, var);
1251
1252 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1253 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1254 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1255 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1256 }
1257
1258 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1259 {
1260 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1261 nir_deref_instr *deref = nir_build_deref_var(b, var);
1262
1263 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1264 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1265 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1266
1267 nir_validate_shader(b->shader, NULL);
1268 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1269 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1270
1271 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1272
1273 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1274 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1275
1276 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1277 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1278 ASSERT_EQ(load->dest.ssa.num_components, 1);
1279
1280 deref = nir_src_as_deref(load->src[0]);
1281 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1282 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1283
1284 deref = nir_deref_instr_parent(deref);
1285 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1286 ASSERT_EQ(deref->var, var);
1287
1288 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1289 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1290 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1291 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1292 }
1293
1294 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1295 {
1296 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1297 nir_deref_instr *deref = nir_build_deref_var(b, var);
1298
1299 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1300 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1301 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1302
1303 nir_validate_shader(b->shader, NULL);
1304 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1305
1306 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1307
1308 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1309 }
1310
1311 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1312 {
1313 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1314 nir_deref_instr *deref = nir_build_deref_var(b, var);
1315
1316 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1317 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1318 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1319
1320 nir_validate_shader(b->shader, NULL);
1321 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1322 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1323
1324 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1325
1326 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1327 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1328
1329 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1330 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1331 ASSERT_EQ(load->dest.ssa.num_components, 2);
1332
1333 deref = nir_src_as_deref(load->src[0]);
1334 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1335
1336 deref = nir_deref_instr_parent(deref);
1337 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1338 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1339
1340 deref = nir_deref_instr_parent(deref);
1341 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1342 ASSERT_EQ(deref->var, var);
1343
1344 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1345 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1346 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1347 ASSERT_EQ(loads[0x3]->swizzle[0], 1);
1348 }
1349
1350 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1351 {
1352 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1353 nir_deref_instr *deref = nir_build_deref_var(b, var);
1354
1355 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1356 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1357
1358 nir_validate_shader(b->shader, NULL);
1359 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1360
1361 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1362
1363 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1364
1365 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1366 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1367 ASSERT_EQ(load->dest.ssa.num_components, 2);
1368
1369 deref = nir_src_as_deref(load->src[0]);
1370 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1371
1372 deref = nir_deref_instr_parent(deref);
1373 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1374 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1375
1376 deref = nir_deref_instr_parent(deref);
1377 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1378 ASSERT_EQ(deref->var, var);
1379
1380 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1381 ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
1382 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1383 ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
1384 }
1385
1386 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1387 {
1388 glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1389 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1390
1391 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1392 nir_deref_instr *deref = nir_build_deref_var(b, var);
1393
1394 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1395 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1396
1397 nir_validate_shader(b->shader, NULL);
1398 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1399
1400 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1401
1402 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1403
1404 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1405 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1406 ASSERT_EQ(load->dest.ssa.num_components, 2);
1407
1408 deref = nir_src_as_deref(load->src[0]);
1409 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1410
1411 deref = nir_deref_instr_parent(deref);
1412 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1413 ASSERT_EQ(deref->strct.index, 0);
1414
1415 deref = nir_deref_instr_parent(deref);
1416 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1417 ASSERT_EQ(deref->var, var);
1418
1419 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1420 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1421 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1422 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1423 }
1424
1425 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1426 {
1427 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1428 nir_deref_instr *deref = nir_build_deref_var(b, var);
1429
1430 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1431 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1432
1433 nir_validate_shader(b->shader, NULL);
1434 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1435
1436 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1437
1438 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1439
1440 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1441 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1442 nir_ssa_def *val = store->src[1].ssa;
1443 ASSERT_EQ(val->bit_size, 32);
1444 ASSERT_EQ(val->num_components, 2);
1445 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1446 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1447 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1448
1449 deref = nir_src_as_deref(store->src[0]);
1450 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1451
1452 deref = nir_deref_instr_parent(deref);
1453 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1454 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1455
1456 deref = nir_deref_instr_parent(deref);
1457 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1458 ASSERT_EQ(deref->var, var);
1459 }
1460
1461 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1462 {
1463 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1464 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1465
1466 nir_validate_shader(b->shader, NULL);
1467 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1468
1469 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1470
1471 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1472 }
1473
1474 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1475 {
1476 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1477 create_load(nir_var_mem_push_const, 0, 8, 0x2);
1478
1479 nir_validate_shader(b->shader, NULL);
1480 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1481
1482 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1483
1484 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1485 }
1486
1487 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1488 {
1489 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1490 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1491 create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1492
1493 nir_validate_shader(b->shader, NULL);
1494 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1495
1496 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1497
1498 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1499 }
1500
1501 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1502 {
1503 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1504 create_indirect_load(nir_var_mem_push_const, 0,
1505 nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 2)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x1);
1506 create_indirect_load(nir_var_mem_push_const, 0,
1507 nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 3)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x2);
1508
1509 nir_validate_shader(b->shader, NULL);
1510 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1511
1512 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1513
1514 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1515 }
1516
1517 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1518 {
1519 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1520 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1521 nir_ssa_def *low = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 12));
1522 nir_ssa_def *high = nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 1)), nir_imm_int(b, 16));
1523 create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1524 create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1525
1526 nir_validate_shader(b->shader, NULL);
1527 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1528
1529 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1530
1531 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1532
1533 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1534 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1535 ASSERT_EQ(load->dest.ssa.num_components, 2);
1536 ASSERT_EQ(load->src[0].ssa, low);
1537 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1538 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1539 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1540 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1541 }
1542
1543 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1544 {
1545 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1546 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1547 create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1548 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1549
1550 nir_validate_shader(b->shader, NULL);
1551 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1552
1553 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1554
1555 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1556 }
1557
1558 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1559 {
1560 nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
1561 nir_ssa_def *store_base = nir_load_local_invocation_index(b);
1562 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1563 create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1564 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1565
1566 nir_validate_shader(b->shader, NULL);
1567 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1568
1569 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1570
1571 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1572 }
1573
1574 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1575 {
1576 /* TODO: try to combine these loads */
1577 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1578 nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 4));
1579 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1580 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1581 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1582
1583 nir_validate_shader(b->shader, NULL);
1584 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1585
1586 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1587
1588 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1589
1590 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1591 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1592 ASSERT_EQ(load->dest.ssa.num_components, 1);
1593 ASSERT_EQ(load->src[1].ssa, offset);
1594 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1595 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1596 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1597 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1598 }
1599
1600 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1601 {
1602 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1603 * these loads can't be combined because if index_base == 268435455, then
1604 * offset == 0 because the addition would wrap around */
1605 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1606 nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
1607 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1608 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1609 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1610
1611 nir_validate_shader(b->shader, NULL);
1612 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1613
1614 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1615
1616 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1617 }
1618
1619 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1620 {
1621 /* TODO: try to combine these loads */
1622 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1623 nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
1624 nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1625 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1626 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1627 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1628
1629 nir_validate_shader(b->shader, NULL);
1630 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1631
1632 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1633
1634 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1635
1636 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1637 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1638 ASSERT_EQ(load->dest.ssa.num_components, 1);
1639 ASSERT_EQ(load->src[1].ssa, offset);
1640 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1641 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1642 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1643 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1644 }
1645
1646 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1647 {
1648 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1649 create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1650 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1651
1652 nir_validate_shader(b->shader, NULL);
1653 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1654
1655 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1656
1657 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1658 }
1659
1660 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1661 {
1662 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1663 create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1664 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1665
1666 nir_validate_shader(b->shader, NULL);
1667 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1668
1669 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1670
1671 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1672
1673 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1674 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1675 ASSERT_EQ(load->dest.ssa.num_components, 1);
1676 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1677 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1678 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1679 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1680 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1681 }
1682
1683 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1684 {
1685 /* TODO: implement type-based alias analysis so that these loads can be
1686 * combined. this is made a bit more difficult than simply using
1687 * nir_compare_derefs() because the vectorizer creates loads/stores with
1688 * casted derefs. The solution would probably be to keep multiple derefs for
1689 * an entry (one for each load/store combined into it). */
1690 glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1691 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1692
1693 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1694 nir_deref_instr *deref = nir_build_deref_var(b, var);
1695
1696 nir_ssa_def *index0 = nir_load_local_invocation_index(b);
1697 nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
1698 nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1699
1700 create_shared_load(load_deref, 0x1);
1701 create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1702 create_shared_load(load_deref, 0x3);
1703
1704 nir_validate_shader(b->shader, NULL);
1705 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1706
1707 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1708
1709 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1710
1711 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1712 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1713 ASSERT_EQ(load->dest.ssa.num_components, 1);
1714 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1715 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1716 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1717 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1718 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1719 }
1720
1721 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1722 {
1723 nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1724 nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1725 nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1726
1727 create_shared_load(load_deref, 0x1);
1728 create_shared_store(nir_build_deref_var(b, var1), 0x2);
1729 create_shared_load(load_deref, 0x3);
1730
1731 nir_validate_shader(b->shader, NULL);
1732 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1733
1734 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1735
1736 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1737
1738 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1739 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1740 ASSERT_EQ(load->dest.ssa.num_components, 1);
1741 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1742 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1743 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1744 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1745 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1746 }
1747
1748 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1749 {
1750 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x100000000, 64), 0x1);
1751 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x200000004, 64), 0x2);
1752
1753 nir_validate_shader(b->shader, NULL);
1754 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1755
1756 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1757
1758 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1759 }
1760
1761 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1762 {
1763 nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1764 nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
1765 nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
1766 create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1767 create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1768
1769 nir_validate_shader(b->shader, NULL);
1770 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1771
1772 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1773
1774 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1775 }