nir: Support vec8/vec16 in nir_lower_bit_size
[mesa.git] / src / compiler / nir / tests / load_store_vectorizer_tests.cpp
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25
26 #include "nir.h"
27 #include "nir_builder.h"
28
29 namespace {
30
31 class nir_load_store_vectorize_test : public ::testing::Test {
32 protected:
33 nir_load_store_vectorize_test();
34 ~nir_load_store_vectorize_test();
35
36 unsigned count_intrinsics(nir_intrinsic_op intrinsic);
37
38 nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
39 unsigned index);
40
41 bool run_vectorizer(nir_variable_mode modes, bool cse=false,
42 nir_variable_mode robust_modes = (nir_variable_mode)0);
43
44 nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
45
46 nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
47 uint32_t id, unsigned bit_size=32, unsigned components=1,
48 unsigned access=0);
49 void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
50 uint32_t id, unsigned bit_size=32, unsigned components=1,
51 unsigned wrmask=0xf, unsigned access=0);
52
53 nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
54 uint32_t id, unsigned bit_size=32, unsigned components=1,
55 unsigned access=0);
56 void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
57 uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
58 unsigned access=0);
59
60 void create_shared_load(nir_deref_instr *deref, uint32_t id,
61 unsigned bit_size=32, unsigned components=1);
62 void create_shared_store(nir_deref_instr *deref, uint32_t id,
63 unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
64
65 bool test_alu(nir_instr *instr, nir_op op);
66 bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
67
68 static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
69 unsigned num_components, unsigned high_offset,
70 nir_intrinsic_instr *low, nir_intrinsic_instr *high);
71 static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
72
73 void *mem_ctx;
74
75 nir_builder *b;
76 std::map<unsigned, nir_alu_src*> loads;
77 std::map<unsigned, nir_ssa_def*> res_map;
78 };
79
80 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
81 {
82 glsl_type_singleton_init_or_ref();
83
84 mem_ctx = ralloc_context(NULL);
85 static const nir_shader_compiler_options options = { };
86 b = rzalloc(mem_ctx, nir_builder);
87 nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_COMPUTE, &options);
88 }
89
90 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
91 {
92 if (HasFailure()) {
93 printf("\nShader from the failed test:\n\n");
94 nir_print_shader(b->shader, stdout);
95 }
96
97 ralloc_free(mem_ctx);
98
99 glsl_type_singleton_decref();
100 }
101
102 unsigned
103 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
104 {
105 unsigned count = 0;
106 nir_foreach_block(block, b->impl) {
107 nir_foreach_instr(instr, block) {
108 if (instr->type != nir_instr_type_intrinsic)
109 continue;
110 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
111 if (intrin->intrinsic == intrinsic)
112 count++;
113 }
114 }
115 return count;
116 }
117
118 nir_intrinsic_instr *
119 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
120 unsigned index)
121 {
122 nir_foreach_block(block, b->impl) {
123 nir_foreach_instr(instr, block) {
124 if (instr->type != nir_instr_type_intrinsic)
125 continue;
126 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
127 if (intrin->intrinsic == intrinsic) {
128 if (index == 0)
129 return intrin;
130 index--;
131 }
132 }
133 }
134 return NULL;
135 }
136
137 bool
138 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
139 bool cse,
140 nir_variable_mode robust_modes)
141 {
142 if (modes & nir_var_mem_shared)
143 nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
144 bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback, robust_modes);
145 if (progress) {
146 nir_validate_shader(b->shader, NULL);
147 if (cse)
148 nir_opt_cse(b->shader);
149 nir_copy_prop(b->shader);
150 nir_opt_algebraic(b->shader);
151 nir_opt_constant_folding(b->shader);
152 }
153 return progress;
154 }
155
156 nir_ssa_def *
157 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
158 {
159 if (res_map.count(binding))
160 return res_map[binding];
161
162 nir_intrinsic_instr *res = nir_intrinsic_instr_create(
163 b->shader, nir_intrinsic_vulkan_resource_index);
164 nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
165 res->num_components = 1;
166 res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
167 nir_intrinsic_set_desc_type(
168 res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
169 nir_intrinsic_set_desc_set(res, 0);
170 nir_intrinsic_set_binding(res, binding);
171 nir_builder_instr_insert(b, &res->instr);
172 res_map[binding] = &res->dest.ssa;
173 return &res->dest.ssa;
174 }
175
176 nir_intrinsic_instr *
177 nir_load_store_vectorize_test::create_indirect_load(
178 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
179 unsigned bit_size, unsigned components, unsigned access)
180 {
181 nir_intrinsic_op intrinsic;
182 nir_ssa_def *res = NULL;
183 switch (mode) {
184 case nir_var_mem_ubo:
185 intrinsic = nir_intrinsic_load_ubo;
186 res = get_resource(binding, false);
187 break;
188 case nir_var_mem_ssbo:
189 intrinsic = nir_intrinsic_load_ssbo;
190 res = get_resource(binding, true);
191 break;
192 case nir_var_mem_push_const:
193 intrinsic = nir_intrinsic_load_push_constant;
194 break;
195 default:
196 return NULL;
197 }
198 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
199 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
200 load->num_components = components;
201 if (res) {
202 load->src[0] = nir_src_for_ssa(res);
203 load->src[1] = nir_src_for_ssa(offset);
204 } else {
205 load->src[0] = nir_src_for_ssa(offset);
206 }
207 if (mode != nir_var_mem_push_const) {
208 nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0);
209 nir_intrinsic_set_access(load, (gl_access_qualifier)access);
210 }
211 nir_builder_instr_insert(b, &load->instr);
212 nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
213 loads[id] = &nir_instr_as_alu(mov)->src[0];
214
215 return load;
216 }
217
218 void
219 nir_load_store_vectorize_test::create_indirect_store(
220 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
221 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
222 {
223 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
224 for (unsigned i = 0; i < components; i++)
225 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
226 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
227
228 nir_intrinsic_op intrinsic;
229 nir_ssa_def *res = NULL;
230 switch (mode) {
231 case nir_var_mem_ssbo:
232 intrinsic = nir_intrinsic_store_ssbo;
233 res = get_resource(binding, true);
234 break;
235 case nir_var_mem_shared:
236 intrinsic = nir_intrinsic_store_shared;
237 break;
238 default:
239 return;
240 }
241 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
242 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
243 store->num_components = components;
244 if (res) {
245 store->src[0] = nir_src_for_ssa(value);
246 store->src[1] = nir_src_for_ssa(res);
247 store->src[2] = nir_src_for_ssa(offset);
248 } else {
249 store->src[0] = nir_src_for_ssa(value);
250 store->src[1] = nir_src_for_ssa(offset);
251 }
252 nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
253 nir_intrinsic_set_access(store, (gl_access_qualifier)access);
254 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
255 nir_builder_instr_insert(b, &store->instr);
256 }
257
258 nir_intrinsic_instr *
259 nir_load_store_vectorize_test::create_load(
260 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
261 unsigned bit_size, unsigned components, unsigned access)
262 {
263 return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
264 }
265
266 void
267 nir_load_store_vectorize_test::create_store(
268 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
269 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
270 {
271 create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
272 }
273
274 void nir_load_store_vectorize_test::create_shared_load(
275 nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
276 {
277 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
278 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
279 load->num_components = components;
280 load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
281 nir_builder_instr_insert(b, &load->instr);
282 nir_instr *mov = nir_mov(b, &load->dest.ssa)->parent_instr;
283 loads[id] = &nir_instr_as_alu(mov)->src[0];
284 }
285
286 void nir_load_store_vectorize_test::create_shared_store(
287 nir_deref_instr *deref, uint32_t id,
288 unsigned bit_size, unsigned components, unsigned wrmask)
289 {
290 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
291 for (unsigned i = 0; i < components; i++)
292 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
293 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
294
295 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_deref);
296 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
297 store->num_components = components;
298 store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
299 store->src[1] = nir_src_for_ssa(value);
300 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
301 nir_builder_instr_insert(b, &store->instr);
302 }
303
304 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
305 {
306 return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
307 }
308
309 bool nir_load_store_vectorize_test::test_alu_def(
310 nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
311 {
312 if (instr->type != nir_instr_type_alu)
313 return false;
314
315 nir_alu_instr *alu = nir_instr_as_alu(instr);
316
317 if (index >= nir_op_infos[alu->op].num_inputs)
318 return false;
319 if (alu->src[index].src.ssa != def)
320 return false;
321 if (alu->src[index].swizzle[0] != swizzle)
322 return false;
323
324 return true;
325 }
326
327 bool nir_load_store_vectorize_test::mem_vectorize_callback(
328 unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
329 nir_intrinsic_instr *low, nir_intrinsic_instr *high)
330 {
331 return bit_size / 8;
332 }
333
334 void nir_load_store_vectorize_test::shared_type_info(
335 const struct glsl_type *type, unsigned *size, unsigned *align)
336 {
337 assert(glsl_type_is_vector_or_scalar(type));
338
339 uint32_t comp_size = glsl_type_is_boolean(type)
340 ? 4 : glsl_get_bit_size(type) / 8;
341 unsigned length = glsl_get_vector_elements(type);
342 *size = comp_size * length,
343 *align = comp_size;
344 }
345 } // namespace
346
347 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
348 {
349 create_load(nir_var_mem_ubo, 0, 0, 0x1);
350 create_load(nir_var_mem_ubo, 0, 4, 0x2);
351
352 nir_validate_shader(b->shader, NULL);
353 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
354
355 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
356
357 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
358
359 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
360 ASSERT_EQ(load->dest.ssa.bit_size, 32);
361 ASSERT_EQ(load->dest.ssa.num_components, 2);
362 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
363 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
364 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
365 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
366 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
367 }
368
369 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
370 {
371 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
372 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
373
374 nir_validate_shader(b->shader, NULL);
375 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
376
377 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
378
379 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
380
381 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
382 ASSERT_EQ(load->dest.ssa.bit_size, 32);
383 ASSERT_EQ(load->dest.ssa.num_components, 3);
384 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
385 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
386 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
387 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
388 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
389 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
390 ASSERT_EQ(loads[0x2]->swizzle[1], 2);
391 }
392
393 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
394 {
395 create_load(nir_var_mem_ubo, 0, 0, 0x1);
396 create_load(nir_var_mem_ubo, 0, 0, 0x2);
397
398 nir_validate_shader(b->shader, NULL);
399 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
400
401 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
402
403 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
404
405 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
406 ASSERT_EQ(load->dest.ssa.bit_size, 32);
407 ASSERT_EQ(load->dest.ssa.num_components, 1);
408 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
409 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
410 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
411 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
412 ASSERT_EQ(loads[0x2]->swizzle[0], 0);
413 }
414
415 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
416 {
417 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
418 create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
419
420 nir_validate_shader(b->shader, NULL);
421 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
422
423 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
424
425 nir_validate_shader(b->shader, NULL);
426 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
427 }
428
429 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
430 {
431 create_load(nir_var_mem_push_const, 0, 0, 0x1);
432 create_load(nir_var_mem_push_const, 0, 4, 0x2);
433
434 nir_validate_shader(b->shader, NULL);
435 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
436
437 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
438
439 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
440
441 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
442 ASSERT_EQ(load->dest.ssa.bit_size, 32);
443 ASSERT_EQ(load->dest.ssa.num_components, 2);
444 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
445 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
446 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
447 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
448 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
449 }
450
451 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
452 {
453 create_load(nir_var_mem_push_const, 0, 0, 0x1);
454 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
455
456 nir_validate_shader(b->shader, NULL);
457 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
458
459 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
460
461 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
462
463 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
464 ASSERT_EQ(load->dest.ssa.bit_size, 32);
465 ASSERT_EQ(load->dest.ssa.num_components, 2);
466 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
467 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
468 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
469 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
470 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
471 }
472
473 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
474 {
475 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
476 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
477
478 nir_validate_shader(b->shader, NULL);
479 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
480
481 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
482
483 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
484
485 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
486 ASSERT_EQ(load->dest.ssa.bit_size, 32);
487 ASSERT_EQ(load->dest.ssa.num_components, 2);
488 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
489 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
490 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
491 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
492 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
493 }
494
495 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
496 {
497 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
498 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
499 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
500
501 nir_validate_shader(b->shader, NULL);
502 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
503
504 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
505
506 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
507
508 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
509 ASSERT_EQ(load->dest.ssa.bit_size, 32);
510 ASSERT_EQ(load->dest.ssa.num_components, 2);
511 ASSERT_EQ(load->src[1].ssa, index_base);
512 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
513 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
514 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
515 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
516 }
517
518 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
519 {
520 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
521 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
522 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
523 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
524
525 nir_validate_shader(b->shader, NULL);
526 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
527
528 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
529
530 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
531
532 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
533 ASSERT_EQ(load->dest.ssa.bit_size, 32);
534 ASSERT_EQ(load->dest.ssa.num_components, 2);
535 ASSERT_EQ(load->src[1].ssa, index_base_prev);
536 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
537 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
538 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
539 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
540 }
541
542 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
543 {
544 nir_ssa_def *inv = nir_load_local_invocation_index(b);
545 nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
546 nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
547 nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
548 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
549 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
550
551 nir_validate_shader(b->shader, NULL);
552 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
553
554 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
555
556 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
557
558 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
559 ASSERT_EQ(load->dest.ssa.bit_size, 32);
560 ASSERT_EQ(load->dest.ssa.num_components, 2);
561 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
562 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
563 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
564 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
565
566 /* nir_opt_algebraic optimizes the imul */
567 ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
568 nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
569 ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
570 nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
571 ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
572 ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
573 }
574
575 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
576 {
577 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
578 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
579 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
580
581 nir_validate_shader(b->shader, NULL);
582 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
583
584 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
585
586 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
587
588 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
589 ASSERT_EQ(load->dest.ssa.bit_size, 32);
590 ASSERT_EQ(load->dest.ssa.num_components, 1);
591 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
592 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
593 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
594 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
595 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
596 }
597
598 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
599 {
600 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
601 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
602 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
603
604 nir_validate_shader(b->shader, NULL);
605 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
606
607 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
608
609 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
610 }
611
612 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
613 {
614 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
615 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
616 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
617
618 nir_validate_shader(b->shader, NULL);
619 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
620
621 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
622
623 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
624 }
625
626 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
627 {
628 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
629 create_load(nir_var_mem_ssbo, 0, 0, 0x2);
630 create_store(nir_var_mem_ssbo, 0, 0, 0x3);
631
632 nir_validate_shader(b->shader, NULL);
633 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
634
635 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
636
637 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
638 }
639
640 /* if nir_opt_load_store_vectorize were implemented like many load/store
641 * optimization passes are (for example, nir_opt_combine_stores and
642 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
643 * encountered, this case wouldn't be optimized.
644 * A similar test for derefs is shared_load_adjacent_store_identical. */
645 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
646 {
647 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
648 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
649 create_load(nir_var_mem_ssbo, 0, 4, 0x3);
650
651 nir_validate_shader(b->shader, NULL);
652 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
653 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
654
655 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
656
657 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
658 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
659
660 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
661 ASSERT_EQ(load->dest.ssa.bit_size, 32);
662 ASSERT_EQ(load->dest.ssa.num_components, 2);
663 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
664 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
665 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
666 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
667 ASSERT_EQ(loads[0x3]->swizzle[0], 1);
668 }
669
670 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
671 {
672 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
673 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
674
675 nir_validate_shader(b->shader, NULL);
676 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
677
678 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
679
680 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
681
682 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
683 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
684 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
685 nir_ssa_def *val = store->src[0].ssa;
686 ASSERT_EQ(val->bit_size, 32);
687 ASSERT_EQ(val->num_components, 2);
688 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
689 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
690 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
691 }
692
693 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
694 {
695 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
696 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
697
698 nir_validate_shader(b->shader, NULL);
699 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
700
701 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
702
703 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
704
705 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
706 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
707 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
708 nir_ssa_def *val = store->src[0].ssa;
709 ASSERT_EQ(val->bit_size, 32);
710 ASSERT_EQ(val->num_components, 3);
711 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
712 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
713 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
714 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
715 }
716
717 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
718 {
719 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
720 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
721
722 nir_validate_shader(b->shader, NULL);
723 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
724
725 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
726
727 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
728
729 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
730 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
731 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
732 nir_ssa_def *val = store->src[0].ssa;
733 ASSERT_EQ(val->bit_size, 32);
734 ASSERT_EQ(val->num_components, 1);
735 ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
736 }
737
738 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
739 {
740 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
741 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
742
743 nir_validate_shader(b->shader, NULL);
744 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
745
746 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
747
748 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
749 }
750
751 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
752 {
753 create_load(nir_var_mem_ubo, 0, 0, 0x1);
754
755 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
756 nir_var_mem_ssbo);
757
758 create_load(nir_var_mem_ubo, 0, 4, 0x2);
759
760 nir_validate_shader(b->shader, NULL);
761 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
762
763 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
764
765 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
766 }
767
768 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
769 {
770 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
771
772 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
773 nir_var_mem_ssbo);
774
775 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
776
777 nir_validate_shader(b->shader, NULL);
778 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
779
780 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
781
782 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
783 }
784
785 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
786 * doesn't require that loads/stores complete.
787 */
788 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
789 {
790 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
791 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_control_barrier)->instr);
792 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
793
794 nir_validate_shader(b->shader, NULL);
795 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
796
797 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
798
799 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
800 }
801
802 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
803 {
804 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
805
806 nir_scoped_memory_barrier(b, NIR_SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
807 nir_var_mem_shared);
808
809 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
810
811 nir_validate_shader(b->shader, NULL);
812 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
813
814 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
815
816 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
817 }
818
819 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
820 {
821 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
822 create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
823 create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
824
825 nir_validate_shader(b->shader, NULL);
826 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
827
828 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
829
830 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
831
832 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
833 ASSERT_EQ(load->dest.ssa.bit_size, 8);
834 ASSERT_EQ(load->dest.ssa.num_components, 4);
835 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
836 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
837 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
838 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
839 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
840
841 nir_ssa_def *val = loads[0x3]->src.ssa;
842 ASSERT_EQ(val->bit_size, 16);
843 ASSERT_EQ(val->num_components, 1);
844 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
845 nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
846 nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
847 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
848 high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
849 ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
850 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
851 ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
852 ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
853 }
854
855 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
856 {
857 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
858 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
859
860 nir_validate_shader(b->shader, NULL);
861 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
862
863 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
864
865 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
866
867 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
868 ASSERT_EQ(load->dest.ssa.bit_size, 32);
869 ASSERT_EQ(load->dest.ssa.num_components, 4);
870 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
871 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
872 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
873 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
874
875 nir_ssa_def *val = loads[0x2]->src.ssa;
876 ASSERT_EQ(val->bit_size, 64);
877 ASSERT_EQ(val->num_components, 1);
878 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
879 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
880 ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
881 ASSERT_EQ(pack->src[0].swizzle[0], 2);
882 ASSERT_EQ(pack->src[0].swizzle[1], 3);
883 }
884
885 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
886 {
887 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
888 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
889 create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
890
891 nir_validate_shader(b->shader, NULL);
892 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
893
894 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
895
896 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
897
898 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
899 ASSERT_EQ(load->dest.ssa.bit_size, 64);
900 ASSERT_EQ(load->dest.ssa.num_components, 3);
901 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
902 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
903 ASSERT_EQ(loads[0x3]->swizzle[0], 2);
904
905 nir_ssa_def *val = loads[0x2]->src.ssa;
906 ASSERT_EQ(val->bit_size, 64);
907 ASSERT_EQ(val->num_components, 1);
908 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
909 nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
910 ASSERT_EQ(mov->src[0].src.ssa, &load->dest.ssa);
911 ASSERT_EQ(mov->src[0].swizzle[0], 1);
912
913 val = loads[0x1]->src.ssa;
914 ASSERT_EQ(val->bit_size, 32);
915 ASSERT_EQ(val->num_components, 2);
916 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
917 nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
918 ASSERT_EQ(unpack->src[0].src.ssa, &load->dest.ssa);
919 ASSERT_EQ(unpack->src[0].swizzle[0], 0);
920 }
921
922 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
923 {
924 create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
925 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
926
927 nir_validate_shader(b->shader, NULL);
928 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
929
930 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
931
932 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
933
934 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
935 ASSERT_EQ(load->dest.ssa.bit_size, 32);
936 ASSERT_EQ(load->dest.ssa.num_components, 3);
937 ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
938 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
939 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
940 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
941
942 nir_ssa_def *val = loads[0x2]->src.ssa;
943 ASSERT_EQ(val->bit_size, 64);
944 ASSERT_EQ(val->num_components, 1);
945 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
946 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
947 ASSERT_EQ(pack->src[0].src.ssa, &load->dest.ssa);
948 ASSERT_EQ(pack->src[0].swizzle[0], 1);
949 ASSERT_EQ(pack->src[0].swizzle[1], 2);
950 }
951
952 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
953 {
954 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
955 create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
956 create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
957
958 nir_validate_shader(b->shader, NULL);
959 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
960
961 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
962
963 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
964
965 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
966 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
967 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
968 nir_ssa_def *val = store->src[0].ssa;
969 ASSERT_EQ(val->bit_size, 8);
970 ASSERT_EQ(val->num_components, 4);
971 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
972 ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
973 ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
974 ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
975 ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
976 }
977
978 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
979 {
980 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
981 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
982
983 nir_validate_shader(b->shader, NULL);
984 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
985
986 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
987
988 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
989
990 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
991 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
992 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
993 nir_ssa_def *val = store->src[0].ssa;
994 ASSERT_EQ(val->bit_size, 32);
995 ASSERT_EQ(val->num_components, 4);
996 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
997 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
998 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
999 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1000 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1001 }
1002
1003 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1004 {
1005 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1006 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1007 create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1008
1009 nir_validate_shader(b->shader, NULL);
1010 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1011
1012 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1013
1014 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1015
1016 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1017 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1018 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1019 nir_ssa_def *val = store->src[0].ssa;
1020 ASSERT_EQ(val->bit_size, 64);
1021 ASSERT_EQ(val->num_components, 3);
1022 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1023 ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1024 ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1025 ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1026 }
1027
1028 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1029 {
1030 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1031 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1032
1033 nir_validate_shader(b->shader, NULL);
1034 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1035
1036 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1037
1038 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1039
1040 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1041 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1042 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1043 nir_ssa_def *val = store->src[0].ssa;
1044 ASSERT_EQ(val->bit_size, 32);
1045 ASSERT_EQ(val->num_components, 3);
1046 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1047 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1048 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1049 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1050 }
1051
1052 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1053 {
1054 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1055 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1056
1057 nir_validate_shader(b->shader, NULL);
1058 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1059
1060 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1061
1062 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1063 }
1064
1065 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1066 {
1067 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1068 create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1069
1070 nir_validate_shader(b->shader, NULL);
1071 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1072
1073 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1074
1075 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1076
1077 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1078 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1079 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1080 nir_ssa_def *val = store->src[0].ssa;
1081 ASSERT_EQ(val->bit_size, 32);
1082 ASSERT_EQ(val->num_components, 4);
1083 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1084 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1085 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1086 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1087 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1088 }
1089
1090 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1091 {
1092 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1093 nir_deref_instr *deref = nir_build_deref_var(b, var);
1094
1095 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1096 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1097
1098 nir_validate_shader(b->shader, NULL);
1099 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1100
1101 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1102
1103 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1104
1105 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1106 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1107 ASSERT_EQ(load->dest.ssa.num_components, 2);
1108
1109 deref = nir_src_as_deref(load->src[0]);
1110 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1111
1112 deref = nir_deref_instr_parent(deref);
1113 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1114 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1115
1116 deref = nir_deref_instr_parent(deref);
1117 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1118 ASSERT_EQ(deref->var, var);
1119
1120 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1121 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1122 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1123 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1124 }
1125
1126 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1127 {
1128 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1129 nir_deref_instr *deref = nir_build_deref_var(b, var);
1130 nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
1131
1132 create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1133 create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1134
1135 nir_validate_shader(b->shader, NULL);
1136 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1137
1138 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1139
1140 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1141 }
1142
1143 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1144 {
1145 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1146 nir_deref_instr *deref = nir_build_deref_var(b, var);
1147 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1148
1149 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1150 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1151
1152 nir_validate_shader(b->shader, NULL);
1153 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1154
1155 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1156
1157 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1158
1159 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1160 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1161 ASSERT_EQ(load->dest.ssa.num_components, 2);
1162
1163 deref = nir_src_as_deref(load->src[0]);
1164 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1165
1166 deref = nir_deref_instr_parent(deref);
1167 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1168 ASSERT_EQ(deref->arr.index.ssa, index_base);
1169
1170 deref = nir_deref_instr_parent(deref);
1171 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1172 ASSERT_EQ(deref->var, var);
1173
1174 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1175 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1176 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1177 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1178 }
1179
1180 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1181 {
1182 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1183 nir_deref_instr *deref = nir_build_deref_var(b, var);
1184 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1185 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1186
1187 create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1188 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1189
1190 nir_validate_shader(b->shader, NULL);
1191 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1192
1193 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1194
1195 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1196
1197 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1198 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1199 ASSERT_EQ(load->dest.ssa.num_components, 2);
1200
1201 deref = nir_src_as_deref(load->src[0]);
1202 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1203
1204 deref = nir_deref_instr_parent(deref);
1205 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1206 ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1207
1208 deref = nir_deref_instr_parent(deref);
1209 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1210 ASSERT_EQ(deref->var, var);
1211
1212 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1213 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1214 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1215 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1216 }
1217
1218 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1219 {
1220 glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1221 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1222
1223 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1224 nir_deref_instr *deref = nir_build_deref_var(b, var);
1225
1226 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1227 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1228
1229 nir_validate_shader(b->shader, NULL);
1230 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1231
1232 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1233
1234 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1235
1236 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1237 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1238 ASSERT_EQ(load->dest.ssa.num_components, 2);
1239
1240 deref = nir_src_as_deref(load->src[0]);
1241 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1242
1243 deref = nir_deref_instr_parent(deref);
1244 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1245 ASSERT_EQ(deref->strct.index, 0);
1246
1247 deref = nir_deref_instr_parent(deref);
1248 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1249 ASSERT_EQ(deref->var, var);
1250
1251 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1252 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1253 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1254 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1255 }
1256
1257 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1258 {
1259 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1260 nir_deref_instr *deref = nir_build_deref_var(b, var);
1261
1262 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1263 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1264 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1265
1266 nir_validate_shader(b->shader, NULL);
1267 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1268 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1269
1270 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1271
1272 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1273 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1274
1275 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1276 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1277 ASSERT_EQ(load->dest.ssa.num_components, 1);
1278
1279 deref = nir_src_as_deref(load->src[0]);
1280 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1281 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1282
1283 deref = nir_deref_instr_parent(deref);
1284 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1285 ASSERT_EQ(deref->var, var);
1286
1287 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1288 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1289 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1290 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1291 }
1292
1293 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1294 {
1295 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1296 nir_deref_instr *deref = nir_build_deref_var(b, var);
1297
1298 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1299 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1300 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1301
1302 nir_validate_shader(b->shader, NULL);
1303 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1304
1305 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1306
1307 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1308 }
1309
1310 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1311 {
1312 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1313 nir_deref_instr *deref = nir_build_deref_var(b, var);
1314
1315 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1316 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1317 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1318
1319 nir_validate_shader(b->shader, NULL);
1320 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1321 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1322
1323 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1324
1325 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1326 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1327
1328 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1329 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1330 ASSERT_EQ(load->dest.ssa.num_components, 2);
1331
1332 deref = nir_src_as_deref(load->src[0]);
1333 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1334
1335 deref = nir_deref_instr_parent(deref);
1336 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1337 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1338
1339 deref = nir_deref_instr_parent(deref);
1340 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1341 ASSERT_EQ(deref->var, var);
1342
1343 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1344 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1345 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1346 ASSERT_EQ(loads[0x3]->swizzle[0], 1);
1347 }
1348
1349 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1350 {
1351 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1352 nir_deref_instr *deref = nir_build_deref_var(b, var);
1353
1354 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1355 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1356
1357 nir_validate_shader(b->shader, NULL);
1358 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1359
1360 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1361
1362 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1363
1364 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1365 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1366 ASSERT_EQ(load->dest.ssa.num_components, 2);
1367
1368 deref = nir_src_as_deref(load->src[0]);
1369 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1370
1371 deref = nir_deref_instr_parent(deref);
1372 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1373 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1374
1375 deref = nir_deref_instr_parent(deref);
1376 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1377 ASSERT_EQ(deref->var, var);
1378
1379 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1380 ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
1381 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1382 ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
1383 }
1384
1385 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1386 {
1387 glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1388 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1389
1390 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1391 nir_deref_instr *deref = nir_build_deref_var(b, var);
1392
1393 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1394 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1395
1396 nir_validate_shader(b->shader, NULL);
1397 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1398
1399 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1400
1401 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1402
1403 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1404 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1405 ASSERT_EQ(load->dest.ssa.num_components, 2);
1406
1407 deref = nir_src_as_deref(load->src[0]);
1408 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1409
1410 deref = nir_deref_instr_parent(deref);
1411 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1412 ASSERT_EQ(deref->strct.index, 0);
1413
1414 deref = nir_deref_instr_parent(deref);
1415 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1416 ASSERT_EQ(deref->var, var);
1417
1418 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1419 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1420 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1421 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1422 }
1423
1424 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1425 {
1426 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1427 nir_deref_instr *deref = nir_build_deref_var(b, var);
1428
1429 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1430 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1431
1432 nir_validate_shader(b->shader, NULL);
1433 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1434
1435 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1436
1437 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1438
1439 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1440 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1441 nir_ssa_def *val = store->src[1].ssa;
1442 ASSERT_EQ(val->bit_size, 32);
1443 ASSERT_EQ(val->num_components, 2);
1444 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1445 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1446 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1447
1448 deref = nir_src_as_deref(store->src[0]);
1449 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1450
1451 deref = nir_deref_instr_parent(deref);
1452 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1453 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1454
1455 deref = nir_deref_instr_parent(deref);
1456 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1457 ASSERT_EQ(deref->var, var);
1458 }
1459
1460 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1461 {
1462 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1463 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1464
1465 nir_validate_shader(b->shader, NULL);
1466 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1467
1468 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1469
1470 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1471 }
1472
1473 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1474 {
1475 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1476 create_load(nir_var_mem_push_const, 0, 8, 0x2);
1477
1478 nir_validate_shader(b->shader, NULL);
1479 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1480
1481 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1482
1483 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1484 }
1485
1486 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1487 {
1488 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1489 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1490 create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1491
1492 nir_validate_shader(b->shader, NULL);
1493 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1494
1495 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1496
1497 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1498 }
1499
1500 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1501 {
1502 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1503 create_indirect_load(nir_var_mem_push_const, 0,
1504 nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 2)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x1);
1505 create_indirect_load(nir_var_mem_push_const, 0,
1506 nir_iadd(b, nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 3)), nir_imm_int(b, 16)), nir_imm_int(b, 32)), 0x2);
1507
1508 nir_validate_shader(b->shader, NULL);
1509 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1510
1511 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1512
1513 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1514 }
1515
1516 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1517 {
1518 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1519 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1520 nir_ssa_def *low = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 12));
1521 nir_ssa_def *high = nir_imul(b, nir_iadd(b, index_base, nir_imm_int(b, 1)), nir_imm_int(b, 16));
1522 create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1523 create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1524
1525 nir_validate_shader(b->shader, NULL);
1526 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1527
1528 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1529
1530 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1531
1532 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1533 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1534 ASSERT_EQ(load->dest.ssa.num_components, 2);
1535 ASSERT_EQ(load->src[0].ssa, low);
1536 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1537 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
1538 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1539 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
1540 }
1541
1542 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1543 {
1544 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1545 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1546 create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1547 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1548
1549 nir_validate_shader(b->shader, NULL);
1550 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1551
1552 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1553
1554 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1555 }
1556
1557 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1558 {
1559 nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
1560 nir_ssa_def *store_base = nir_load_local_invocation_index(b);
1561 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1562 create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1563 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1564
1565 nir_validate_shader(b->shader, NULL);
1566 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1567
1568 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1569
1570 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1571 }
1572
1573 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1574 {
1575 /* TODO: try to combine these loads */
1576 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1577 nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 4));
1578 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1579 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1580 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1581
1582 nir_validate_shader(b->shader, NULL);
1583 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1584
1585 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1586
1587 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1588
1589 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1590 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1591 ASSERT_EQ(load->dest.ssa.num_components, 1);
1592 ASSERT_EQ(load->src[1].ssa, offset);
1593 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1594 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1595 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1596 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1597 }
1598
1599 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1600 {
1601 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1602 * these loads can't be combined because if index_base == 268435455, then
1603 * offset == 0 because the addition would wrap around */
1604 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1605 nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
1606 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1607 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1608 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1609
1610 nir_validate_shader(b->shader, NULL);
1611 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1612
1613 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1614
1615 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1616 }
1617
1618 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1619 {
1620 /* TODO: try to combine these loads */
1621 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1622 nir_ssa_def *offset = nir_iadd(b, nir_imul(b, index_base, nir_imm_int(b, 16)), nir_imm_int(b, 16));
1623 nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1624 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1625 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1626 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1627
1628 nir_validate_shader(b->shader, NULL);
1629 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1630
1631 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1632
1633 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1634
1635 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1636 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1637 ASSERT_EQ(load->dest.ssa.num_components, 1);
1638 ASSERT_EQ(load->src[1].ssa, offset);
1639 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1640 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1641 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1642 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1643 }
1644
1645 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1646 {
1647 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1648 create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1649 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1650
1651 nir_validate_shader(b->shader, NULL);
1652 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1653
1654 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1655
1656 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1657 }
1658
1659 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1660 {
1661 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1662 create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1663 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1664
1665 nir_validate_shader(b->shader, NULL);
1666 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1667
1668 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1669
1670 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1671
1672 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1673 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1674 ASSERT_EQ(load->dest.ssa.num_components, 1);
1675 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1676 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1677 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1678 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1679 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1680 }
1681
1682 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1683 {
1684 /* TODO: implement type-based alias analysis so that these loads can be
1685 * combined. this is made a bit more difficult than simply using
1686 * nir_compare_derefs() because the vectorizer creates loads/stores with
1687 * casted derefs. The solution would probably be to keep multiple derefs for
1688 * an entry (one for each load/store combined into it). */
1689 glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1690 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1691
1692 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1693 nir_deref_instr *deref = nir_build_deref_var(b, var);
1694
1695 nir_ssa_def *index0 = nir_load_local_invocation_index(b);
1696 nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
1697 nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1698
1699 create_shared_load(load_deref, 0x1);
1700 create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1701 create_shared_load(load_deref, 0x3);
1702
1703 nir_validate_shader(b->shader, NULL);
1704 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1705
1706 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1707
1708 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1709
1710 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1711 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1712 ASSERT_EQ(load->dest.ssa.num_components, 1);
1713 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1714 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1715 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1716 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1717 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1718 }
1719
1720 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1721 {
1722 nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1723 nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1724 nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1725
1726 create_shared_load(load_deref, 0x1);
1727 create_shared_store(nir_build_deref_var(b, var1), 0x2);
1728 create_shared_load(load_deref, 0x3);
1729
1730 nir_validate_shader(b->shader, NULL);
1731 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1732
1733 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1734
1735 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1736
1737 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1738 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1739 ASSERT_EQ(load->dest.ssa.num_components, 1);
1740 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1741 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
1742 ASSERT_EQ(loads[0x3]->src.ssa, &load->dest.ssa);
1743 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
1744 ASSERT_EQ(loads[0x3]->swizzle[0], 0);
1745 }
1746
1747 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1748 {
1749 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x100000000, 64), 0x1);
1750 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_intN_t(b, 0x200000004, 64), 0x2);
1751
1752 nir_validate_shader(b->shader, NULL);
1753 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1754
1755 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1756
1757 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1758 }
1759
1760 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1761 {
1762 nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1763 nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
1764 nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
1765 create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1766 create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1767
1768 nir_validate_shader(b->shader, NULL);
1769 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1770
1771 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1772
1773 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1774 }
1775
1776 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1777 {
1778 create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1779 create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1780
1781 nir_validate_shader(b->shader, NULL);
1782 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1783
1784 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1785
1786 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1787 }