nir/load_store_vectorizer: Use more imm helpers in the tests.
[mesa.git] / src / compiler / nir / tests / load_store_vectorizer_tests.cpp
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25
26 #include "nir.h"
27 #include "nir_builder.h"
28
29 /* This is a macro so you get good line numbers */
30 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle) \
31 EXPECT_EQ((instr)->src[0].src.ssa, &(load)->dest.ssa); \
32 EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
33
34 namespace {
35
36 class nir_load_store_vectorize_test : public ::testing::Test {
37 protected:
38 nir_load_store_vectorize_test();
39 ~nir_load_store_vectorize_test();
40
41 unsigned count_intrinsics(nir_intrinsic_op intrinsic);
42
43 nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
44 unsigned index);
45
46 bool run_vectorizer(nir_variable_mode modes, bool cse=false,
47 nir_variable_mode robust_modes = (nir_variable_mode)0);
48
49 nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
50
51 nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
52 uint32_t id, unsigned bit_size=32, unsigned components=1,
53 unsigned access=0);
54 void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
55 uint32_t id, unsigned bit_size=32, unsigned components=1,
56 unsigned wrmask=0xf, unsigned access=0);
57
58 nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
59 uint32_t id, unsigned bit_size=32, unsigned components=1,
60 unsigned access=0);
61 void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
62 uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
63 unsigned access=0);
64
65 void create_shared_load(nir_deref_instr *deref, uint32_t id,
66 unsigned bit_size=32, unsigned components=1);
67 void create_shared_store(nir_deref_instr *deref, uint32_t id,
68 unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
69
70 bool test_alu(nir_instr *instr, nir_op op);
71 bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
72
73 static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
74 unsigned num_components, unsigned high_offset,
75 nir_intrinsic_instr *low, nir_intrinsic_instr *high);
76 static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
77
78 std::string swizzle(nir_alu_instr *instr, int src);
79
80 void *mem_ctx;
81
82 nir_builder *b;
83 std::map<unsigned, nir_alu_instr*> movs;
84 std::map<unsigned, nir_alu_src*> loads;
85 std::map<unsigned, nir_ssa_def*> res_map;
86 };
87
88 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
89 {
90 glsl_type_singleton_init_or_ref();
91
92 mem_ctx = ralloc_context(NULL);
93 static const nir_shader_compiler_options options = { };
94 b = rzalloc(mem_ctx, nir_builder);
95 nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_COMPUTE, &options);
96 }
97
98 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
99 {
100 if (HasFailure()) {
101 printf("\nShader from the failed test:\n\n");
102 nir_print_shader(b->shader, stdout);
103 }
104
105 ralloc_free(mem_ctx);
106
107 glsl_type_singleton_decref();
108 }
109
110 std::string
111 nir_load_store_vectorize_test::swizzle(nir_alu_instr *instr, int src)
112 {
113 std::string swizzle;
114 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(instr, src); i++) {
115 swizzle += "xyzw"[instr->src[src].swizzle[i]];
116 }
117
118 return swizzle;
119 }
120
121 unsigned
122 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
123 {
124 unsigned count = 0;
125 nir_foreach_block(block, b->impl) {
126 nir_foreach_instr(instr, block) {
127 if (instr->type != nir_instr_type_intrinsic)
128 continue;
129 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
130 if (intrin->intrinsic == intrinsic)
131 count++;
132 }
133 }
134 return count;
135 }
136
137 nir_intrinsic_instr *
138 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
139 unsigned index)
140 {
141 nir_foreach_block(block, b->impl) {
142 nir_foreach_instr(instr, block) {
143 if (instr->type != nir_instr_type_intrinsic)
144 continue;
145 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
146 if (intrin->intrinsic == intrinsic) {
147 if (index == 0)
148 return intrin;
149 index--;
150 }
151 }
152 }
153 return NULL;
154 }
155
156 bool
157 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
158 bool cse,
159 nir_variable_mode robust_modes)
160 {
161 if (modes & nir_var_mem_shared)
162 nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
163 bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback, robust_modes);
164 if (progress) {
165 nir_validate_shader(b->shader, NULL);
166 if (cse)
167 nir_opt_cse(b->shader);
168 nir_copy_prop(b->shader);
169 nir_opt_algebraic(b->shader);
170 nir_opt_constant_folding(b->shader);
171 }
172 return progress;
173 }
174
175 nir_ssa_def *
176 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
177 {
178 if (res_map.count(binding))
179 return res_map[binding];
180
181 nir_intrinsic_instr *res = nir_intrinsic_instr_create(
182 b->shader, nir_intrinsic_vulkan_resource_index);
183 nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
184 res->num_components = 1;
185 res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
186 nir_intrinsic_set_desc_type(
187 res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
188 nir_intrinsic_set_desc_set(res, 0);
189 nir_intrinsic_set_binding(res, binding);
190 nir_builder_instr_insert(b, &res->instr);
191 res_map[binding] = &res->dest.ssa;
192 return &res->dest.ssa;
193 }
194
195 nir_intrinsic_instr *
196 nir_load_store_vectorize_test::create_indirect_load(
197 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
198 unsigned bit_size, unsigned components, unsigned access)
199 {
200 nir_intrinsic_op intrinsic;
201 nir_ssa_def *res = NULL;
202 switch (mode) {
203 case nir_var_mem_ubo:
204 intrinsic = nir_intrinsic_load_ubo;
205 res = get_resource(binding, false);
206 break;
207 case nir_var_mem_ssbo:
208 intrinsic = nir_intrinsic_load_ssbo;
209 res = get_resource(binding, true);
210 break;
211 case nir_var_mem_push_const:
212 intrinsic = nir_intrinsic_load_push_constant;
213 break;
214 default:
215 return NULL;
216 }
217 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
218 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
219 load->num_components = components;
220 if (res) {
221 load->src[0] = nir_src_for_ssa(res);
222 load->src[1] = nir_src_for_ssa(offset);
223 } else {
224 load->src[0] = nir_src_for_ssa(offset);
225 }
226 if (mode != nir_var_mem_push_const) {
227 nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0);
228 nir_intrinsic_set_access(load, (gl_access_qualifier)access);
229 }
230 nir_builder_instr_insert(b, &load->instr);
231 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
232 movs[id] = mov;
233 loads[id] = &mov->src[0];
234
235 return load;
236 }
237
238 void
239 nir_load_store_vectorize_test::create_indirect_store(
240 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
241 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
242 {
243 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
244 for (unsigned i = 0; i < components; i++)
245 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
246 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
247
248 nir_intrinsic_op intrinsic;
249 nir_ssa_def *res = NULL;
250 switch (mode) {
251 case nir_var_mem_ssbo:
252 intrinsic = nir_intrinsic_store_ssbo;
253 res = get_resource(binding, true);
254 break;
255 case nir_var_mem_shared:
256 intrinsic = nir_intrinsic_store_shared;
257 break;
258 default:
259 return;
260 }
261 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
262 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
263 store->num_components = components;
264 if (res) {
265 store->src[0] = nir_src_for_ssa(value);
266 store->src[1] = nir_src_for_ssa(res);
267 store->src[2] = nir_src_for_ssa(offset);
268 } else {
269 store->src[0] = nir_src_for_ssa(value);
270 store->src[1] = nir_src_for_ssa(offset);
271 }
272 nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
273 nir_intrinsic_set_access(store, (gl_access_qualifier)access);
274 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
275 nir_builder_instr_insert(b, &store->instr);
276 }
277
278 nir_intrinsic_instr *
279 nir_load_store_vectorize_test::create_load(
280 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
281 unsigned bit_size, unsigned components, unsigned access)
282 {
283 return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
284 }
285
286 void
287 nir_load_store_vectorize_test::create_store(
288 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
289 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
290 {
291 create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
292 }
293
294 void nir_load_store_vectorize_test::create_shared_load(
295 nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
296 {
297 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
298 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
299 load->num_components = components;
300 load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
301 nir_builder_instr_insert(b, &load->instr);
302 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
303 movs[id] = mov;
304 loads[id] = &mov->src[0];
305 }
306
307 void nir_load_store_vectorize_test::create_shared_store(
308 nir_deref_instr *deref, uint32_t id,
309 unsigned bit_size, unsigned components, unsigned wrmask)
310 {
311 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
312 for (unsigned i = 0; i < components; i++)
313 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
314 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
315
316 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_deref);
317 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
318 store->num_components = components;
319 store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
320 store->src[1] = nir_src_for_ssa(value);
321 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
322 nir_builder_instr_insert(b, &store->instr);
323 }
324
325 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
326 {
327 return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
328 }
329
330 bool nir_load_store_vectorize_test::test_alu_def(
331 nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
332 {
333 if (instr->type != nir_instr_type_alu)
334 return false;
335
336 nir_alu_instr *alu = nir_instr_as_alu(instr);
337
338 if (index >= nir_op_infos[alu->op].num_inputs)
339 return false;
340 if (alu->src[index].src.ssa != def)
341 return false;
342 if (alu->src[index].swizzle[0] != swizzle)
343 return false;
344
345 return true;
346 }
347
348 bool nir_load_store_vectorize_test::mem_vectorize_callback(
349 unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
350 nir_intrinsic_instr *low, nir_intrinsic_instr *high)
351 {
352 return bit_size / 8;
353 }
354
355 void nir_load_store_vectorize_test::shared_type_info(
356 const struct glsl_type *type, unsigned *size, unsigned *align)
357 {
358 assert(glsl_type_is_vector_or_scalar(type));
359
360 uint32_t comp_size = glsl_type_is_boolean(type)
361 ? 4 : glsl_get_bit_size(type) / 8;
362 unsigned length = glsl_get_vector_elements(type);
363 *size = comp_size * length,
364 *align = comp_size;
365 }
366 } // namespace
367
368 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
369 {
370 create_load(nir_var_mem_ubo, 0, 0, 0x1);
371 create_load(nir_var_mem_ubo, 0, 4, 0x2);
372
373 nir_validate_shader(b->shader, NULL);
374 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
375
376 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
377
378 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
379
380 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
381 ASSERT_EQ(load->dest.ssa.bit_size, 32);
382 ASSERT_EQ(load->dest.ssa.num_components, 2);
383 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
384 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
385 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
386 }
387
388 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
389 {
390 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
391 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
392
393 nir_validate_shader(b->shader, NULL);
394 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
395
396 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
397
398 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
399
400 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
401 ASSERT_EQ(load->dest.ssa.bit_size, 32);
402 ASSERT_EQ(load->dest.ssa.num_components, 3);
403 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
404 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
405 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
406 }
407
408 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
409 {
410 create_load(nir_var_mem_ubo, 0, 0, 0x1);
411 create_load(nir_var_mem_ubo, 0, 0, 0x2);
412
413 nir_validate_shader(b->shader, NULL);
414 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
415
416 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
417
418 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
419
420 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
421 ASSERT_EQ(load->dest.ssa.bit_size, 32);
422 ASSERT_EQ(load->dest.ssa.num_components, 1);
423 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
424 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
425 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
426 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
427 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
428 }
429
430 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
431 {
432 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
433 create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
434
435 nir_validate_shader(b->shader, NULL);
436 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
437
438 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
439
440 nir_validate_shader(b->shader, NULL);
441 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
442 }
443
444 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
445 {
446 create_load(nir_var_mem_push_const, 0, 0, 0x1);
447 create_load(nir_var_mem_push_const, 0, 4, 0x2);
448
449 nir_validate_shader(b->shader, NULL);
450 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
451
452 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
453
454 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
455
456 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
457 ASSERT_EQ(load->dest.ssa.bit_size, 32);
458 ASSERT_EQ(load->dest.ssa.num_components, 2);
459 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
460 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
461 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
462 }
463
464 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
465 {
466 create_load(nir_var_mem_push_const, 0, 0, 0x1);
467 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
468
469 nir_validate_shader(b->shader, NULL);
470 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
471
472 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
473
474 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
475
476 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
477 ASSERT_EQ(load->dest.ssa.bit_size, 32);
478 ASSERT_EQ(load->dest.ssa.num_components, 2);
479 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
480 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
481 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
482 }
483
484 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
485 {
486 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
487 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
488
489 nir_validate_shader(b->shader, NULL);
490 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
491
492 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
493
494 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
495
496 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
497 ASSERT_EQ(load->dest.ssa.bit_size, 32);
498 ASSERT_EQ(load->dest.ssa.num_components, 2);
499 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
500 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
501 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
502 }
503
504 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
505 {
506 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
507 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
508 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
509
510 nir_validate_shader(b->shader, NULL);
511 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
512
513 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
514
515 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
516
517 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
518 ASSERT_EQ(load->dest.ssa.bit_size, 32);
519 ASSERT_EQ(load->dest.ssa.num_components, 2);
520 ASSERT_EQ(load->src[1].ssa, index_base);
521 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
522 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
523 }
524
525 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
526 {
527 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
528 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
529 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
530 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
531
532 nir_validate_shader(b->shader, NULL);
533 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
534
535 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
536
537 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
538
539 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
540 ASSERT_EQ(load->dest.ssa.bit_size, 32);
541 ASSERT_EQ(load->dest.ssa.num_components, 2);
542 ASSERT_EQ(load->src[1].ssa, index_base_prev);
543 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
544 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
545 }
546
547 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
548 {
549 nir_ssa_def *inv = nir_load_local_invocation_index(b);
550 nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
551 nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
552 nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
553 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
554 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
555
556 nir_validate_shader(b->shader, NULL);
557 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
558
559 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
560
561 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
562
563 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
564 ASSERT_EQ(load->dest.ssa.bit_size, 32);
565 ASSERT_EQ(load->dest.ssa.num_components, 2);
566 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
567 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
568
569 /* nir_opt_algebraic optimizes the imul */
570 ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
571 nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
572 ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
573 nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
574 ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
575 ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
576 }
577
578 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
579 {
580 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
581 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
582 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
583
584 nir_validate_shader(b->shader, NULL);
585 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
586
587 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
588
589 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
590
591 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
592 ASSERT_EQ(load->dest.ssa.bit_size, 32);
593 ASSERT_EQ(load->dest.ssa.num_components, 1);
594 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
595 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
596 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
597 }
598
599 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
600 {
601 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
602 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
603 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
604
605 nir_validate_shader(b->shader, NULL);
606 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
607
608 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
609
610 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
611 }
612
613 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
614 {
615 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
616 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
617 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
618
619 nir_validate_shader(b->shader, NULL);
620 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
621
622 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
623
624 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
625 }
626
627 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
628 {
629 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
630 create_load(nir_var_mem_ssbo, 0, 0, 0x2);
631 create_store(nir_var_mem_ssbo, 0, 0, 0x3);
632
633 nir_validate_shader(b->shader, NULL);
634 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
635
636 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
637
638 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
639 }
640
641 /* if nir_opt_load_store_vectorize were implemented like many load/store
642 * optimization passes are (for example, nir_opt_combine_stores and
643 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
644 * encountered, this case wouldn't be optimized.
645 * A similar test for derefs is shared_load_adjacent_store_identical. */
646 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
647 {
648 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
649 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
650 create_load(nir_var_mem_ssbo, 0, 4, 0x3);
651
652 nir_validate_shader(b->shader, NULL);
653 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
654 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
655
656 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
657
658 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
659 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
660
661 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
662 ASSERT_EQ(load->dest.ssa.bit_size, 32);
663 ASSERT_EQ(load->dest.ssa.num_components, 2);
664 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
665 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
666 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
667 }
668
669 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
670 {
671 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
672 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
673
674 nir_validate_shader(b->shader, NULL);
675 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
676
677 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
678
679 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
680
681 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
682 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
683 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
684 nir_ssa_def *val = store->src[0].ssa;
685 ASSERT_EQ(val->bit_size, 32);
686 ASSERT_EQ(val->num_components, 2);
687 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
688 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
689 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
690 }
691
692 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
693 {
694 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
695 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
696
697 nir_validate_shader(b->shader, NULL);
698 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
699
700 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
701
702 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
703
704 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
705 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
706 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
707 nir_ssa_def *val = store->src[0].ssa;
708 ASSERT_EQ(val->bit_size, 32);
709 ASSERT_EQ(val->num_components, 3);
710 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
711 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
712 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
713 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
714 }
715
716 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
717 {
718 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
719 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
720
721 nir_validate_shader(b->shader, NULL);
722 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
723
724 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
725
726 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
727
728 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
729 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
730 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
731 nir_ssa_def *val = store->src[0].ssa;
732 ASSERT_EQ(val->bit_size, 32);
733 ASSERT_EQ(val->num_components, 1);
734 ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
735 }
736
737 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
738 {
739 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
740 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
741
742 nir_validate_shader(b->shader, NULL);
743 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
744
745 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
746
747 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
748 }
749
750 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
751 {
752 create_load(nir_var_mem_ubo, 0, 0, 0x1);
753
754 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
755 nir_var_mem_ssbo);
756
757 create_load(nir_var_mem_ubo, 0, 4, 0x2);
758
759 nir_validate_shader(b->shader, NULL);
760 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
761
762 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
763
764 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
765 }
766
767 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
768 {
769 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
770
771 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
772 nir_var_mem_ssbo);
773
774 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
775
776 nir_validate_shader(b->shader, NULL);
777 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
778
779 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
780
781 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
782 }
783
784 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
785 * doesn't require that loads/stores complete.
786 */
787 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
788 {
789 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
790 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_control_barrier)->instr);
791 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
792
793 nir_validate_shader(b->shader, NULL);
794 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
795
796 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
797
798 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
799 }
800
801 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
802 {
803 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
804
805 nir_scoped_memory_barrier(b, NIR_SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
806 nir_var_mem_shared);
807
808 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
809
810 nir_validate_shader(b->shader, NULL);
811 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
812
813 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
814
815 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
816 }
817
818 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
819 {
820 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
821 create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
822 create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
823
824 nir_validate_shader(b->shader, NULL);
825 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
826
827 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
828
829 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
830
831 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
832 ASSERT_EQ(load->dest.ssa.bit_size, 8);
833 ASSERT_EQ(load->dest.ssa.num_components, 4);
834 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
835 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
836 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
837
838 nir_ssa_def *val = loads[0x3]->src.ssa;
839 ASSERT_EQ(val->bit_size, 16);
840 ASSERT_EQ(val->num_components, 1);
841 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
842 nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
843 nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
844 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
845 high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
846 ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
847 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
848 ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
849 ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
850 }
851
852 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
853 {
854 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
855 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
856
857 nir_validate_shader(b->shader, NULL);
858 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
859
860 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
861
862 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
863
864 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
865 ASSERT_EQ(load->dest.ssa.bit_size, 32);
866 ASSERT_EQ(load->dest.ssa.num_components, 4);
867 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
868 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
869
870 nir_ssa_def *val = loads[0x2]->src.ssa;
871 ASSERT_EQ(val->bit_size, 64);
872 ASSERT_EQ(val->num_components, 1);
873 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
874 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
875 EXPECT_INSTR_SWIZZLES(pack, load, "zw");
876 }
877
878 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
879 {
880 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
881 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
882 create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
883
884 nir_validate_shader(b->shader, NULL);
885 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
886
887 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
888
889 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
890
891 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
892 ASSERT_EQ(load->dest.ssa.bit_size, 64);
893 ASSERT_EQ(load->dest.ssa.num_components, 3);
894 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
895 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
896
897 nir_ssa_def *val = loads[0x2]->src.ssa;
898 ASSERT_EQ(val->bit_size, 64);
899 ASSERT_EQ(val->num_components, 1);
900 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
901 nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
902 EXPECT_INSTR_SWIZZLES(mov, load, "y");
903
904 val = loads[0x1]->src.ssa;
905 ASSERT_EQ(val->bit_size, 32);
906 ASSERT_EQ(val->num_components, 2);
907 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
908 nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
909 EXPECT_INSTR_SWIZZLES(unpack, load, "x");
910 }
911
912 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
913 {
914 create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
915 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
916
917 nir_validate_shader(b->shader, NULL);
918 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
919
920 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
921
922 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
923
924 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
925 ASSERT_EQ(load->dest.ssa.bit_size, 32);
926 ASSERT_EQ(load->dest.ssa.num_components, 3);
927 ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
928 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
929
930 nir_ssa_def *val = loads[0x2]->src.ssa;
931 ASSERT_EQ(val->bit_size, 64);
932 ASSERT_EQ(val->num_components, 1);
933 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
934 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
935 EXPECT_INSTR_SWIZZLES(pack, load, "yz");
936 }
937
938 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
939 {
940 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
941 create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
942 create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
943
944 nir_validate_shader(b->shader, NULL);
945 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
946
947 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
948
949 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
950
951 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
952 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
953 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
954 nir_ssa_def *val = store->src[0].ssa;
955 ASSERT_EQ(val->bit_size, 8);
956 ASSERT_EQ(val->num_components, 4);
957 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
958 ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
959 ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
960 ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
961 ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
962 }
963
964 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
965 {
966 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
967 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
968
969 nir_validate_shader(b->shader, NULL);
970 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
971
972 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
973
974 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
975
976 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
977 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
978 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
979 nir_ssa_def *val = store->src[0].ssa;
980 ASSERT_EQ(val->bit_size, 32);
981 ASSERT_EQ(val->num_components, 4);
982 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
983 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
984 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
985 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
986 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
987 }
988
989 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
990 {
991 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
992 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
993 create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
994
995 nir_validate_shader(b->shader, NULL);
996 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
997
998 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
999
1000 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1001
1002 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1003 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1004 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1005 nir_ssa_def *val = store->src[0].ssa;
1006 ASSERT_EQ(val->bit_size, 64);
1007 ASSERT_EQ(val->num_components, 3);
1008 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1009 ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1010 ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1011 ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1012 }
1013
1014 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1015 {
1016 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1017 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1018
1019 nir_validate_shader(b->shader, NULL);
1020 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1021
1022 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1023
1024 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1025
1026 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1027 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1028 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1029 nir_ssa_def *val = store->src[0].ssa;
1030 ASSERT_EQ(val->bit_size, 32);
1031 ASSERT_EQ(val->num_components, 3);
1032 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1033 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1034 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1035 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1036 }
1037
1038 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1039 {
1040 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1041 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1042
1043 nir_validate_shader(b->shader, NULL);
1044 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1045
1046 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1047
1048 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1049 }
1050
1051 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1052 {
1053 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1054 create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1055
1056 nir_validate_shader(b->shader, NULL);
1057 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1058
1059 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1060
1061 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1062
1063 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1064 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1065 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1066 nir_ssa_def *val = store->src[0].ssa;
1067 ASSERT_EQ(val->bit_size, 32);
1068 ASSERT_EQ(val->num_components, 4);
1069 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1070 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1071 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1072 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1073 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1074 }
1075
1076 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1077 {
1078 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1079 nir_deref_instr *deref = nir_build_deref_var(b, var);
1080
1081 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1082 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1083
1084 nir_validate_shader(b->shader, NULL);
1085 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1086
1087 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1088
1089 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1090
1091 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1092 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1093 ASSERT_EQ(load->dest.ssa.num_components, 2);
1094
1095 deref = nir_src_as_deref(load->src[0]);
1096 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1097
1098 deref = nir_deref_instr_parent(deref);
1099 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1100 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1101
1102 deref = nir_deref_instr_parent(deref);
1103 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1104 ASSERT_EQ(deref->var, var);
1105
1106 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1107 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1108 }
1109
1110 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1111 {
1112 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1113 nir_deref_instr *deref = nir_build_deref_var(b, var);
1114 nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
1115
1116 create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1117 create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1118
1119 nir_validate_shader(b->shader, NULL);
1120 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1121
1122 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1123
1124 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1125 }
1126
1127 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1128 {
1129 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1130 nir_deref_instr *deref = nir_build_deref_var(b, var);
1131 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1132
1133 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1134 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1135
1136 nir_validate_shader(b->shader, NULL);
1137 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1138
1139 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1140
1141 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1142
1143 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1144 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1145 ASSERT_EQ(load->dest.ssa.num_components, 2);
1146
1147 deref = nir_src_as_deref(load->src[0]);
1148 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1149
1150 deref = nir_deref_instr_parent(deref);
1151 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1152 ASSERT_EQ(deref->arr.index.ssa, index_base);
1153
1154 deref = nir_deref_instr_parent(deref);
1155 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1156 ASSERT_EQ(deref->var, var);
1157
1158 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1159 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1160 }
1161
1162 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1163 {
1164 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1165 nir_deref_instr *deref = nir_build_deref_var(b, var);
1166 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1167 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1168
1169 create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1170 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1171
1172 nir_validate_shader(b->shader, NULL);
1173 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1174
1175 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1176
1177 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1178
1179 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1180 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1181 ASSERT_EQ(load->dest.ssa.num_components, 2);
1182
1183 deref = nir_src_as_deref(load->src[0]);
1184 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1185
1186 deref = nir_deref_instr_parent(deref);
1187 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1188 ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1189
1190 deref = nir_deref_instr_parent(deref);
1191 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1192 ASSERT_EQ(deref->var, var);
1193
1194 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1195 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1196 }
1197
1198 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1199 {
1200 glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1201 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1202
1203 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1204 nir_deref_instr *deref = nir_build_deref_var(b, var);
1205
1206 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1207 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1208
1209 nir_validate_shader(b->shader, NULL);
1210 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1211
1212 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1213
1214 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1215
1216 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1217 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1218 ASSERT_EQ(load->dest.ssa.num_components, 2);
1219
1220 deref = nir_src_as_deref(load->src[0]);
1221 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1222
1223 deref = nir_deref_instr_parent(deref);
1224 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1225 ASSERT_EQ(deref->strct.index, 0);
1226
1227 deref = nir_deref_instr_parent(deref);
1228 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1229 ASSERT_EQ(deref->var, var);
1230
1231 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1232 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1233 }
1234
1235 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1236 {
1237 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1238 nir_deref_instr *deref = nir_build_deref_var(b, var);
1239
1240 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1241 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1242 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1243
1244 nir_validate_shader(b->shader, NULL);
1245 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1246 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1247
1248 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1249
1250 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1251 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1252
1253 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1254 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1255 ASSERT_EQ(load->dest.ssa.num_components, 1);
1256
1257 deref = nir_src_as_deref(load->src[0]);
1258 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1259 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1260
1261 deref = nir_deref_instr_parent(deref);
1262 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1263 ASSERT_EQ(deref->var, var);
1264
1265 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1266 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1267 }
1268
1269 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1270 {
1271 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1272 nir_deref_instr *deref = nir_build_deref_var(b, var);
1273
1274 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1275 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1276 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1277
1278 nir_validate_shader(b->shader, NULL);
1279 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1280
1281 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1282
1283 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1284 }
1285
1286 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1287 {
1288 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1289 nir_deref_instr *deref = nir_build_deref_var(b, var);
1290
1291 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1292 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1293 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1294
1295 nir_validate_shader(b->shader, NULL);
1296 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1297 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1298
1299 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1300
1301 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1302 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1303
1304 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1305 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1306 ASSERT_EQ(load->dest.ssa.num_components, 2);
1307
1308 deref = nir_src_as_deref(load->src[0]);
1309 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1310
1311 deref = nir_deref_instr_parent(deref);
1312 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1313 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1314
1315 deref = nir_deref_instr_parent(deref);
1316 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1317 ASSERT_EQ(deref->var, var);
1318
1319 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1320 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1321 }
1322
1323 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1324 {
1325 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1326 nir_deref_instr *deref = nir_build_deref_var(b, var);
1327
1328 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1329 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1330
1331 nir_validate_shader(b->shader, NULL);
1332 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1333
1334 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1335
1336 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1337
1338 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1339 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1340 ASSERT_EQ(load->dest.ssa.num_components, 2);
1341
1342 deref = nir_src_as_deref(load->src[0]);
1343 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1344
1345 deref = nir_deref_instr_parent(deref);
1346 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1347 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1348
1349 deref = nir_deref_instr_parent(deref);
1350 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1351 ASSERT_EQ(deref->var, var);
1352
1353 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1354 ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
1355 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1356 ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
1357 }
1358
1359 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1360 {
1361 glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1362 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1363
1364 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1365 nir_deref_instr *deref = nir_build_deref_var(b, var);
1366
1367 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1368 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1369
1370 nir_validate_shader(b->shader, NULL);
1371 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1372
1373 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1374
1375 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1376
1377 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1378 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1379 ASSERT_EQ(load->dest.ssa.num_components, 2);
1380
1381 deref = nir_src_as_deref(load->src[0]);
1382 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1383
1384 deref = nir_deref_instr_parent(deref);
1385 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1386 ASSERT_EQ(deref->strct.index, 0);
1387
1388 deref = nir_deref_instr_parent(deref);
1389 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1390 ASSERT_EQ(deref->var, var);
1391
1392 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1393 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1394
1395 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1396 }
1397
1398 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1399 {
1400 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1401 nir_deref_instr *deref = nir_build_deref_var(b, var);
1402
1403 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1404 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1405
1406 nir_validate_shader(b->shader, NULL);
1407 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1408
1409 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1410
1411 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1412
1413 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1414 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1415 nir_ssa_def *val = store->src[1].ssa;
1416 ASSERT_EQ(val->bit_size, 32);
1417 ASSERT_EQ(val->num_components, 2);
1418 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1419 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1420 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1421
1422 deref = nir_src_as_deref(store->src[0]);
1423 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1424
1425 deref = nir_deref_instr_parent(deref);
1426 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1427 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1428
1429 deref = nir_deref_instr_parent(deref);
1430 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1431 ASSERT_EQ(deref->var, var);
1432 }
1433
1434 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1435 {
1436 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1437 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1438
1439 nir_validate_shader(b->shader, NULL);
1440 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1441
1442 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1443
1444 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1445 }
1446
1447 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1448 {
1449 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1450 create_load(nir_var_mem_push_const, 0, 8, 0x2);
1451
1452 nir_validate_shader(b->shader, NULL);
1453 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1454
1455 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1456
1457 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1458 }
1459
1460 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1461 {
1462 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1463 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1464 create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1465
1466 nir_validate_shader(b->shader, NULL);
1467 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1468
1469 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1470
1471 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1472 }
1473
1474 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1475 {
1476 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1477 create_indirect_load(nir_var_mem_push_const, 0,
1478 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 2), 16), 32), 0x1);
1479 create_indirect_load(nir_var_mem_push_const, 0,
1480 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 3), 16), 32), 0x2);
1481
1482 nir_validate_shader(b->shader, NULL);
1483 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1484
1485 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1486
1487 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1488 }
1489
1490 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1491 {
1492 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1493 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1494 nir_ssa_def *low = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 12);
1495 nir_ssa_def *high = nir_imul_imm(b, nir_iadd_imm(b, index_base, 1), 16);
1496 create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1497 create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1498
1499 nir_validate_shader(b->shader, NULL);
1500 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1501
1502 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1503
1504 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1505
1506 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1507 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1508 ASSERT_EQ(load->dest.ssa.num_components, 2);
1509 ASSERT_EQ(load->src[0].ssa, low);
1510 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1511 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1512 }
1513
1514 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1515 {
1516 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1517 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1518 create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1519 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1520
1521 nir_validate_shader(b->shader, NULL);
1522 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1523
1524 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1525
1526 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1527 }
1528
1529 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1530 {
1531 nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
1532 nir_ssa_def *store_base = nir_load_local_invocation_index(b);
1533 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1534 create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1535 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1536
1537 nir_validate_shader(b->shader, NULL);
1538 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1539
1540 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1541
1542 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1543 }
1544
1545 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1546 {
1547 /* TODO: try to combine these loads */
1548 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1549 nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 4);
1550 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1551 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1552 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1553
1554 nir_validate_shader(b->shader, NULL);
1555 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1556
1557 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1558
1559 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1560
1561 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1562 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1563 ASSERT_EQ(load->dest.ssa.num_components, 1);
1564 ASSERT_EQ(load->src[1].ssa, offset);
1565 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1566 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1567 }
1568
1569 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1570 {
1571 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1572 * these loads can't be combined because if index_base == 268435455, then
1573 * offset == 0 because the addition would wrap around */
1574 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1575 nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1576 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1577 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1578 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1579
1580 nir_validate_shader(b->shader, NULL);
1581 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1582
1583 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1584
1585 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1586 }
1587
1588 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1589 {
1590 /* TODO: try to combine these loads */
1591 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1592 nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1593 nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1594 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1595 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1596 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1597
1598 nir_validate_shader(b->shader, NULL);
1599 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1600
1601 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1602
1603 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1604
1605 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1606 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1607 ASSERT_EQ(load->dest.ssa.num_components, 1);
1608 ASSERT_EQ(load->src[1].ssa, offset);
1609 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1610 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1611 }
1612
1613 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1614 {
1615 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1616 create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1617 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1618
1619 nir_validate_shader(b->shader, NULL);
1620 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1621
1622 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1623
1624 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1625 }
1626
1627 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1628 {
1629 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1630 create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1631 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1632
1633 nir_validate_shader(b->shader, NULL);
1634 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1635
1636 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1637
1638 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1639
1640 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1641 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1642 ASSERT_EQ(load->dest.ssa.num_components, 1);
1643 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1644 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1645 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1646 }
1647
1648 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1649 {
1650 /* TODO: implement type-based alias analysis so that these loads can be
1651 * combined. this is made a bit more difficult than simply using
1652 * nir_compare_derefs() because the vectorizer creates loads/stores with
1653 * casted derefs. The solution would probably be to keep multiple derefs for
1654 * an entry (one for each load/store combined into it). */
1655 glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1656 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1657
1658 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1659 nir_deref_instr *deref = nir_build_deref_var(b, var);
1660
1661 nir_ssa_def *index0 = nir_load_local_invocation_index(b);
1662 nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
1663 nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1664
1665 create_shared_load(load_deref, 0x1);
1666 create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1667 create_shared_load(load_deref, 0x3);
1668
1669 nir_validate_shader(b->shader, NULL);
1670 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1671
1672 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1673
1674 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1675
1676 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1677 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1678 ASSERT_EQ(load->dest.ssa.num_components, 1);
1679 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1680 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1681 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1682 }
1683
1684 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1685 {
1686 nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1687 nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1688 nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1689
1690 create_shared_load(load_deref, 0x1);
1691 create_shared_store(nir_build_deref_var(b, var1), 0x2);
1692 create_shared_load(load_deref, 0x3);
1693
1694 nir_validate_shader(b->shader, NULL);
1695 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1696
1697 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1698
1699 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1700
1701 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1702 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1703 ASSERT_EQ(load->dest.ssa.num_components, 1);
1704 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1705 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1706 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1707 }
1708
1709 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1710 {
1711 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x100000000), 0x1);
1712 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x200000004), 0x2);
1713
1714 nir_validate_shader(b->shader, NULL);
1715 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1716
1717 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1718
1719 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1720 }
1721
1722 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1723 {
1724 nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1725 nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
1726 nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
1727 create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1728 create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1729
1730 nir_validate_shader(b->shader, NULL);
1731 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1732
1733 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1734
1735 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1736 }
1737
1738 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1739 {
1740 create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1741 create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1742
1743 nir_validate_shader(b->shader, NULL);
1744 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1745
1746 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1747
1748 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1749 }