util: use C99 declaration in the for-loop set_foreach() macro
[mesa.git] / src / compiler / nir / nir_split_vars.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28
29 #include "util/u_math.h"
30
31
32 struct split_var_state {
33 void *mem_ctx;
34
35 nir_shader *shader;
36 nir_function_impl *impl;
37
38 nir_variable *base_var;
39 };
40
41 struct field {
42 struct field *parent;
43
44 const struct glsl_type *type;
45
46 unsigned num_fields;
47 struct field *fields;
48
49 nir_variable *var;
50 };
51
52 static const struct glsl_type *
53 wrap_type_in_array(const struct glsl_type *type,
54 const struct glsl_type *array_type)
55 {
56 if (!glsl_type_is_array(array_type))
57 return type;
58
59 const struct glsl_type *elem_type =
60 wrap_type_in_array(type, glsl_get_array_element(array_type));
61 return glsl_array_type(elem_type, glsl_get_length(array_type));
62 }
63
64 static int
65 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
66 {
67 int num_levels = 0;
68 while (true) {
69 if (glsl_type_is_array_or_matrix(type)) {
70 num_levels++;
71 type = glsl_get_array_element(type);
72 } else if (glsl_type_is_vector_or_scalar(type)) {
73 return num_levels;
74 } else {
75 /* Not an array of vectors */
76 return -1;
77 }
78 }
79 }
80
81 static void
82 init_field_for_type(struct field *field, struct field *parent,
83 const struct glsl_type *type,
84 const char *name,
85 struct split_var_state *state)
86 {
87 *field = (struct field) {
88 .parent = parent,
89 .type = type,
90 };
91
92 const struct glsl_type *struct_type = glsl_without_array(type);
93 if (glsl_type_is_struct(struct_type)) {
94 field->num_fields = glsl_get_length(struct_type),
95 field->fields = ralloc_array(state->mem_ctx, struct field,
96 field->num_fields);
97 for (unsigned i = 0; i < field->num_fields; i++) {
98 char *field_name = NULL;
99 if (name) {
100 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
101 glsl_get_struct_elem_name(struct_type, i));
102 } else {
103 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
104 glsl_get_type_name(struct_type),
105 glsl_get_struct_elem_name(struct_type, i));
106 }
107 init_field_for_type(&field->fields[i], field,
108 glsl_get_struct_field(struct_type, i),
109 field_name, state);
110 }
111 } else {
112 const struct glsl_type *var_type = type;
113 for (struct field *f = field->parent; f; f = f->parent)
114 var_type = wrap_type_in_array(var_type, f->type);
115
116 nir_variable_mode mode = state->base_var->data.mode;
117 if (mode == nir_var_local) {
118 field->var = nir_local_variable_create(state->impl, var_type, name);
119 } else {
120 field->var = nir_variable_create(state->shader, mode, var_type, name);
121 }
122 }
123 }
124
125 static bool
126 split_var_list_structs(nir_shader *shader,
127 nir_function_impl *impl,
128 struct exec_list *vars,
129 struct hash_table *var_field_map,
130 void *mem_ctx)
131 {
132 struct split_var_state state = {
133 .mem_ctx = mem_ctx,
134 .shader = shader,
135 .impl = impl,
136 };
137
138 struct exec_list split_vars;
139 exec_list_make_empty(&split_vars);
140
141 /* To avoid list confusion (we'll be adding things as we split variables),
142 * pull all of the variables we plan to split off of the list
143 */
144 nir_foreach_variable_safe(var, vars) {
145 if (!glsl_type_is_struct(glsl_without_array(var->type)))
146 continue;
147
148 exec_node_remove(&var->node);
149 exec_list_push_tail(&split_vars, &var->node);
150 }
151
152 nir_foreach_variable(var, &split_vars) {
153 state.base_var = var;
154
155 struct field *root_field = ralloc(mem_ctx, struct field);
156 init_field_for_type(root_field, NULL, var->type, var->name, &state);
157 _mesa_hash_table_insert(var_field_map, var, root_field);
158 }
159
160 return !exec_list_is_empty(&split_vars);
161 }
162
163 static void
164 split_struct_derefs_impl(nir_function_impl *impl,
165 struct hash_table *var_field_map,
166 nir_variable_mode modes,
167 void *mem_ctx)
168 {
169 nir_builder b;
170 nir_builder_init(&b, impl);
171
172 nir_foreach_block(block, impl) {
173 nir_foreach_instr_safe(instr, block) {
174 if (instr->type != nir_instr_type_deref)
175 continue;
176
177 nir_deref_instr *deref = nir_instr_as_deref(instr);
178 if (!(deref->mode & modes))
179 continue;
180
181 /* Clean up any dead derefs we find lying around. They may refer to
182 * variables we're planning to split.
183 */
184 if (nir_deref_instr_remove_if_unused(deref))
185 continue;
186
187 if (!glsl_type_is_vector_or_scalar(deref->type))
188 continue;
189
190 nir_variable *base_var = nir_deref_instr_get_variable(deref);
191 struct hash_entry *entry =
192 _mesa_hash_table_search(var_field_map, base_var);
193 if (!entry)
194 continue;
195
196 struct field *root_field = entry->data;
197
198 nir_deref_path path;
199 nir_deref_path_init(&path, deref, mem_ctx);
200
201 struct field *tail_field = root_field;
202 for (unsigned i = 0; path.path[i]; i++) {
203 if (path.path[i]->deref_type != nir_deref_type_struct)
204 continue;
205
206 assert(i > 0);
207 assert(glsl_type_is_struct(path.path[i - 1]->type));
208 assert(path.path[i - 1]->type ==
209 glsl_without_array(tail_field->type));
210
211 tail_field = &tail_field->fields[path.path[i]->strct.index];
212 }
213 nir_variable *split_var = tail_field->var;
214
215 nir_deref_instr *new_deref = NULL;
216 for (unsigned i = 0; path.path[i]; i++) {
217 nir_deref_instr *p = path.path[i];
218 b.cursor = nir_after_instr(&p->instr);
219
220 switch (p->deref_type) {
221 case nir_deref_type_var:
222 assert(new_deref == NULL);
223 new_deref = nir_build_deref_var(&b, split_var);
224 break;
225
226 case nir_deref_type_array:
227 case nir_deref_type_array_wildcard:
228 new_deref = nir_build_deref_follower(&b, new_deref, p);
229 break;
230
231 case nir_deref_type_struct:
232 /* Nothing to do; we're splitting structs */
233 break;
234
235 default:
236 unreachable("Invalid deref type in path");
237 }
238 }
239
240 assert(new_deref->type == deref->type);
241 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
242 nir_src_for_ssa(&new_deref->dest.ssa));
243 nir_deref_instr_remove_if_unused(deref);
244 }
245 }
246 }
247
248 /** A pass for splitting structs into multiple variables
249 *
250 * This pass splits arrays of structs into multiple variables, one for each
251 * (possibly nested) structure member. After this pass completes, no
252 * variables of the given mode will contain a struct type.
253 */
254 bool
255 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
256 {
257 void *mem_ctx = ralloc_context(NULL);
258 struct hash_table *var_field_map =
259 _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
260 _mesa_key_pointer_equal);
261
262 assert((modes & (nir_var_global | nir_var_local)) == modes);
263
264 bool has_global_splits = false;
265 if (modes & nir_var_global) {
266 has_global_splits = split_var_list_structs(shader, NULL,
267 &shader->globals,
268 var_field_map, mem_ctx);
269 }
270
271 bool progress = false;
272 nir_foreach_function(function, shader) {
273 if (!function->impl)
274 continue;
275
276 bool has_local_splits = false;
277 if (modes & nir_var_local) {
278 has_local_splits = split_var_list_structs(shader, function->impl,
279 &function->impl->locals,
280 var_field_map, mem_ctx);
281 }
282
283 if (has_global_splits || has_local_splits) {
284 split_struct_derefs_impl(function->impl, var_field_map,
285 modes, mem_ctx);
286
287 nir_metadata_preserve(function->impl, nir_metadata_block_index |
288 nir_metadata_dominance);
289 progress = true;
290 }
291 }
292
293 ralloc_free(mem_ctx);
294
295 return progress;
296 }
297
298 struct array_level_info {
299 unsigned array_len;
300 bool split;
301 };
302
303 struct array_split {
304 /* Only set if this is the tail end of the splitting */
305 nir_variable *var;
306
307 unsigned num_splits;
308 struct array_split *splits;
309 };
310
311 struct array_var_info {
312 nir_variable *base_var;
313
314 const struct glsl_type *split_var_type;
315
316 bool split_var;
317 struct array_split root_split;
318
319 unsigned num_levels;
320 struct array_level_info levels[0];
321 };
322
323 static bool
324 init_var_list_array_infos(struct exec_list *vars,
325 struct hash_table *var_info_map,
326 void *mem_ctx)
327 {
328 bool has_array = false;
329
330 nir_foreach_variable(var, vars) {
331 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
332 if (num_levels <= 0)
333 continue;
334
335 struct array_var_info *info =
336 rzalloc_size(mem_ctx, sizeof(*info) +
337 num_levels * sizeof(info->levels[0]));
338
339 info->base_var = var;
340 info->num_levels = num_levels;
341
342 const struct glsl_type *type = var->type;
343 for (int i = 0; i < num_levels; i++) {
344 info->levels[i].array_len = glsl_get_length(type);
345 type = glsl_get_array_element(type);
346
347 /* All levels start out initially as split */
348 info->levels[i].split = true;
349 }
350
351 _mesa_hash_table_insert(var_info_map, var, info);
352 has_array = true;
353 }
354
355 return has_array;
356 }
357
358 static struct array_var_info *
359 get_array_var_info(nir_variable *var,
360 struct hash_table *var_info_map)
361 {
362 struct hash_entry *entry =
363 _mesa_hash_table_search(var_info_map, var);
364 return entry ? entry->data : NULL;
365 }
366
367 static struct array_var_info *
368 get_array_deref_info(nir_deref_instr *deref,
369 struct hash_table *var_info_map,
370 nir_variable_mode modes)
371 {
372 if (!(deref->mode & modes))
373 return NULL;
374
375 return get_array_var_info(nir_deref_instr_get_variable(deref),
376 var_info_map);
377 }
378
379 static void
380 mark_array_deref_used(nir_deref_instr *deref,
381 struct hash_table *var_info_map,
382 nir_variable_mode modes,
383 void *mem_ctx)
384 {
385 struct array_var_info *info =
386 get_array_deref_info(deref, var_info_map, modes);
387 if (!info)
388 return;
389
390 nir_deref_path path;
391 nir_deref_path_init(&path, deref, mem_ctx);
392
393 /* Walk the path and look for indirects. If we have an array deref with an
394 * indirect, mark the given level as not being split.
395 */
396 for (unsigned i = 0; i < info->num_levels; i++) {
397 nir_deref_instr *p = path.path[i + 1];
398 if (p->deref_type == nir_deref_type_array &&
399 !nir_src_is_const(p->arr.index))
400 info->levels[i].split = false;
401 }
402 }
403
404 static void
405 mark_array_usage_impl(nir_function_impl *impl,
406 struct hash_table *var_info_map,
407 nir_variable_mode modes,
408 void *mem_ctx)
409 {
410 nir_foreach_block(block, impl) {
411 nir_foreach_instr(instr, block) {
412 if (instr->type != nir_instr_type_intrinsic)
413 continue;
414
415 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
416 switch (intrin->intrinsic) {
417 case nir_intrinsic_copy_deref:
418 mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
419 var_info_map, modes, mem_ctx);
420 /* Fall Through */
421
422 case nir_intrinsic_load_deref:
423 case nir_intrinsic_store_deref:
424 mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
425 var_info_map, modes, mem_ctx);
426 break;
427
428 default:
429 break;
430 }
431 }
432 }
433 }
434
435 static void
436 create_split_array_vars(struct array_var_info *var_info,
437 unsigned level,
438 struct array_split *split,
439 const char *name,
440 nir_shader *shader,
441 nir_function_impl *impl,
442 void *mem_ctx)
443 {
444 while (level < var_info->num_levels && !var_info->levels[level].split) {
445 name = ralloc_asprintf(mem_ctx, "%s[*]", name);
446 level++;
447 }
448
449 if (level == var_info->num_levels) {
450 /* We add parens to the variable name so it looks like "(foo[2][*])" so
451 * that further derefs will look like "(foo[2][*])[ssa_6]"
452 */
453 name = ralloc_asprintf(mem_ctx, "(%s)", name);
454
455 nir_variable_mode mode = var_info->base_var->data.mode;
456 if (mode == nir_var_local) {
457 split->var = nir_local_variable_create(impl,
458 var_info->split_var_type, name);
459 } else {
460 split->var = nir_variable_create(shader, mode,
461 var_info->split_var_type, name);
462 }
463 } else {
464 assert(var_info->levels[level].split);
465 split->num_splits = var_info->levels[level].array_len;
466 split->splits = rzalloc_array(mem_ctx, struct array_split,
467 split->num_splits);
468 for (unsigned i = 0; i < split->num_splits; i++) {
469 create_split_array_vars(var_info, level + 1, &split->splits[i],
470 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
471 shader, impl, mem_ctx);
472 }
473 }
474 }
475
476 static bool
477 split_var_list_arrays(nir_shader *shader,
478 nir_function_impl *impl,
479 struct exec_list *vars,
480 struct hash_table *var_info_map,
481 void *mem_ctx)
482 {
483 struct exec_list split_vars;
484 exec_list_make_empty(&split_vars);
485
486 nir_foreach_variable_safe(var, vars) {
487 struct array_var_info *info = get_array_var_info(var, var_info_map);
488 if (!info)
489 continue;
490
491 bool has_split = false;
492 const struct glsl_type *split_type =
493 glsl_without_array_or_matrix(var->type);
494 for (int i = info->num_levels - 1; i >= 0; i--) {
495 if (info->levels[i].split) {
496 has_split = true;
497 continue;
498 }
499
500 /* If the original type was a matrix type, we'd like to keep that so
501 * we don't convert matrices into arrays.
502 */
503 if (i == info->num_levels - 1 &&
504 glsl_type_is_matrix(glsl_without_array(var->type))) {
505 split_type = glsl_matrix_type(glsl_get_base_type(split_type),
506 glsl_get_components(split_type),
507 info->levels[i].array_len);
508 } else {
509 split_type = glsl_array_type(split_type, info->levels[i].array_len);
510 }
511 }
512
513 if (has_split) {
514 info->split_var_type = split_type;
515 /* To avoid list confusion (we'll be adding things as we split
516 * variables), pull all of the variables we plan to split off of the
517 * main variable list.
518 */
519 exec_node_remove(&var->node);
520 exec_list_push_tail(&split_vars, &var->node);
521 } else {
522 assert(split_type == var->type);
523 /* If we're not modifying this variable, delete the info so we skip
524 * it faster in later passes.
525 */
526 _mesa_hash_table_remove_key(var_info_map, var);
527 }
528 }
529
530 nir_foreach_variable(var, &split_vars) {
531 struct array_var_info *info = get_array_var_info(var, var_info_map);
532 create_split_array_vars(info, 0, &info->root_split, var->name,
533 shader, impl, mem_ctx);
534 }
535
536 return !exec_list_is_empty(&split_vars);
537 }
538
539 static bool
540 deref_has_split_wildcard(nir_deref_path *path,
541 struct array_var_info *info)
542 {
543 if (info == NULL)
544 return false;
545
546 assert(path->path[0]->var == info->base_var);
547 for (unsigned i = 0; i < info->num_levels; i++) {
548 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
549 info->levels[i].split)
550 return true;
551 }
552
553 return false;
554 }
555
556 static bool
557 array_path_is_out_of_bounds(nir_deref_path *path,
558 struct array_var_info *info)
559 {
560 if (info == NULL)
561 return false;
562
563 assert(path->path[0]->var == info->base_var);
564 for (unsigned i = 0; i < info->num_levels; i++) {
565 nir_deref_instr *p = path->path[i + 1];
566 if (p->deref_type == nir_deref_type_array_wildcard)
567 continue;
568
569 if (nir_src_is_const(p->arr.index) &&
570 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
571 return true;
572 }
573
574 return false;
575 }
576
577 static void
578 emit_split_copies(nir_builder *b,
579 struct array_var_info *dst_info, nir_deref_path *dst_path,
580 unsigned dst_level, nir_deref_instr *dst,
581 struct array_var_info *src_info, nir_deref_path *src_path,
582 unsigned src_level, nir_deref_instr *src)
583 {
584 nir_deref_instr *dst_p, *src_p;
585
586 while ((dst_p = dst_path->path[dst_level + 1])) {
587 if (dst_p->deref_type == nir_deref_type_array_wildcard)
588 break;
589
590 dst = nir_build_deref_follower(b, dst, dst_p);
591 dst_level++;
592 }
593
594 while ((src_p = src_path->path[src_level + 1])) {
595 if (src_p->deref_type == nir_deref_type_array_wildcard)
596 break;
597
598 src = nir_build_deref_follower(b, src, src_p);
599 src_level++;
600 }
601
602 if (src_p == NULL || dst_p == NULL) {
603 assert(src_p == NULL && dst_p == NULL);
604 nir_copy_deref(b, dst, src);
605 } else {
606 assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
607 src_p->deref_type == nir_deref_type_array_wildcard);
608
609 if ((dst_info && dst_info->levels[dst_level].split) ||
610 (src_info && src_info->levels[src_level].split)) {
611 /* There are no indirects at this level on one of the source or the
612 * destination so we are lowering it.
613 */
614 assert(glsl_get_length(dst_path->path[dst_level]->type) ==
615 glsl_get_length(src_path->path[src_level]->type));
616 unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
617 for (unsigned i = 0; i < len; i++) {
618 nir_ssa_def *idx = nir_imm_int(b, i);
619 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
620 nir_build_deref_array(b, dst, idx),
621 src_info, src_path, src_level + 1,
622 nir_build_deref_array(b, src, idx));
623 }
624 } else {
625 /* Neither side is being split so we just keep going */
626 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
627 nir_build_deref_array_wildcard(b, dst),
628 src_info, src_path, src_level + 1,
629 nir_build_deref_array_wildcard(b, src));
630 }
631 }
632 }
633
634 static void
635 split_array_copies_impl(nir_function_impl *impl,
636 struct hash_table *var_info_map,
637 nir_variable_mode modes,
638 void *mem_ctx)
639 {
640 nir_builder b;
641 nir_builder_init(&b, impl);
642
643 nir_foreach_block(block, impl) {
644 nir_foreach_instr_safe(instr, block) {
645 if (instr->type != nir_instr_type_intrinsic)
646 continue;
647
648 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
649 if (copy->intrinsic != nir_intrinsic_copy_deref)
650 continue;
651
652 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
653 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
654
655 struct array_var_info *dst_info =
656 get_array_deref_info(dst_deref, var_info_map, modes);
657 struct array_var_info *src_info =
658 get_array_deref_info(src_deref, var_info_map, modes);
659
660 if (!src_info && !dst_info)
661 continue;
662
663 nir_deref_path dst_path, src_path;
664 nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
665 nir_deref_path_init(&src_path, src_deref, mem_ctx);
666
667 if (!deref_has_split_wildcard(&dst_path, dst_info) &&
668 !deref_has_split_wildcard(&src_path, src_info))
669 continue;
670
671 b.cursor = nir_instr_remove(&copy->instr);
672
673 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
674 src_info, &src_path, 0, src_path.path[0]);
675 }
676 }
677 }
678
679 static void
680 split_array_access_impl(nir_function_impl *impl,
681 struct hash_table *var_info_map,
682 nir_variable_mode modes,
683 void *mem_ctx)
684 {
685 nir_builder b;
686 nir_builder_init(&b, impl);
687
688 nir_foreach_block(block, impl) {
689 nir_foreach_instr_safe(instr, block) {
690 if (instr->type == nir_instr_type_deref) {
691 /* Clean up any dead derefs we find lying around. They may refer
692 * to variables we're planning to split.
693 */
694 nir_deref_instr *deref = nir_instr_as_deref(instr);
695 if (deref->mode & modes)
696 nir_deref_instr_remove_if_unused(deref);
697 continue;
698 }
699
700 if (instr->type != nir_instr_type_intrinsic)
701 continue;
702
703 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
704 if (intrin->intrinsic != nir_intrinsic_load_deref &&
705 intrin->intrinsic != nir_intrinsic_store_deref &&
706 intrin->intrinsic != nir_intrinsic_copy_deref)
707 continue;
708
709 const unsigned num_derefs =
710 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
711
712 for (unsigned d = 0; d < num_derefs; d++) {
713 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
714
715 struct array_var_info *info =
716 get_array_deref_info(deref, var_info_map, modes);
717 if (!info)
718 continue;
719
720 nir_deref_path path;
721 nir_deref_path_init(&path, deref, mem_ctx);
722
723 b.cursor = nir_before_instr(&intrin->instr);
724
725 if (array_path_is_out_of_bounds(&path, info)) {
726 /* If one of the derefs is out-of-bounds, we just delete the
727 * instruction. If a destination is out of bounds, then it may
728 * have been in-bounds prior to shrinking so we don't want to
729 * accidentally stomp something. However, we've already proven
730 * that it will never be read so it's safe to delete. If a
731 * source is out of bounds then it is loading random garbage.
732 * For loads, we replace their uses with an undef instruction
733 * and for copies we just delete the copy since it was writing
734 * undefined garbage anyway and we may as well leave the random
735 * garbage in the destination alone.
736 */
737 if (intrin->intrinsic == nir_intrinsic_load_deref) {
738 nir_ssa_def *u =
739 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
740 intrin->dest.ssa.bit_size);
741 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
742 nir_src_for_ssa(u));
743 }
744 nir_instr_remove(&intrin->instr);
745 for (unsigned i = 0; i < num_derefs; i++)
746 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
747 break;
748 }
749
750 struct array_split *split = &info->root_split;
751 for (unsigned i = 0; i < info->num_levels; i++) {
752 if (info->levels[i].split) {
753 nir_deref_instr *p = path.path[i + 1];
754 unsigned index = nir_src_as_uint(p->arr.index);
755 assert(index < info->levels[i].array_len);
756 split = &split->splits[index];
757 }
758 }
759 assert(!split->splits && split->var);
760
761 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
762 for (unsigned i = 0; i < info->num_levels; i++) {
763 if (!info->levels[i].split) {
764 new_deref = nir_build_deref_follower(&b, new_deref,
765 path.path[i + 1]);
766 }
767 }
768 assert(new_deref->type == deref->type);
769
770 /* Rewrite the deref source to point to the split one */
771 nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
772 nir_src_for_ssa(&new_deref->dest.ssa));
773 nir_deref_instr_remove_if_unused(deref);
774 }
775 }
776 }
777 }
778
779 /** A pass for splitting arrays of vectors into multiple variables
780 *
781 * This pass looks at arrays (possibly multiple levels) of vectors (not
782 * structures or other types) and tries to split them into piles of variables,
783 * one for each array element. The heuristic used is simple: If a given array
784 * level is never used with an indirect, that array level will get split.
785 *
786 * This pass probably could handles structures easily enough but making a pass
787 * that could see through an array of structures of arrays would be difficult
788 * so it's best to just run nir_split_struct_vars first.
789 */
790 bool
791 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
792 {
793 void *mem_ctx = ralloc_context(NULL);
794 struct hash_table *var_info_map =
795 _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
796 _mesa_key_pointer_equal);
797
798 assert((modes & (nir_var_global | nir_var_local)) == modes);
799
800 bool has_global_array = false;
801 if (modes & nir_var_global) {
802 has_global_array = init_var_list_array_infos(&shader->globals,
803 var_info_map, mem_ctx);
804 }
805
806 bool has_any_array = false;
807 nir_foreach_function(function, shader) {
808 if (!function->impl)
809 continue;
810
811 bool has_local_array = false;
812 if (modes & nir_var_local) {
813 has_local_array = init_var_list_array_infos(&function->impl->locals,
814 var_info_map, mem_ctx);
815 }
816
817 if (has_global_array || has_local_array) {
818 has_any_array = true;
819 mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
820 }
821 }
822
823 /* If we failed to find any arrays of arrays, bail early. */
824 if (!has_any_array) {
825 ralloc_free(mem_ctx);
826 return false;
827 }
828
829 bool has_global_splits = false;
830 if (modes & nir_var_global) {
831 has_global_splits = split_var_list_arrays(shader, NULL,
832 &shader->globals,
833 var_info_map, mem_ctx);
834 }
835
836 bool progress = false;
837 nir_foreach_function(function, shader) {
838 if (!function->impl)
839 continue;
840
841 bool has_local_splits = false;
842 if (modes & nir_var_local) {
843 has_local_splits = split_var_list_arrays(shader, function->impl,
844 &function->impl->locals,
845 var_info_map, mem_ctx);
846 }
847
848 if (has_global_splits || has_local_splits) {
849 split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
850 split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
851
852 nir_metadata_preserve(function->impl, nir_metadata_block_index |
853 nir_metadata_dominance);
854 progress = true;
855 }
856 }
857
858 ralloc_free(mem_ctx);
859
860 return progress;
861 }
862
863 struct array_level_usage {
864 unsigned array_len;
865
866 /* The value UINT_MAX will be used to indicate an indirect */
867 unsigned max_read;
868 unsigned max_written;
869
870 /* True if there is a copy that isn't to/from a shrinkable array */
871 bool has_external_copy;
872 struct set *levels_copied;
873 };
874
875 struct vec_var_usage {
876 /* Convenience set of all components this variable has */
877 nir_component_mask_t all_comps;
878
879 nir_component_mask_t comps_read;
880 nir_component_mask_t comps_written;
881
882 nir_component_mask_t comps_kept;
883
884 /* True if there is a copy that isn't to/from a shrinkable vector */
885 bool has_external_copy;
886 struct set *vars_copied;
887
888 unsigned num_levels;
889 struct array_level_usage levels[0];
890 };
891
892 static struct vec_var_usage *
893 get_vec_var_usage(nir_variable *var,
894 struct hash_table *var_usage_map,
895 bool add_usage_entry, void *mem_ctx)
896 {
897 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
898 if (entry)
899 return entry->data;
900
901 if (!add_usage_entry)
902 return NULL;
903
904 /* Check to make sure that we are working with an array of vectors. We
905 * don't bother to shrink single vectors because we figure that we can
906 * clean it up better with SSA than by inserting piles of vecN instructions
907 * to compact results.
908 */
909 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
910 if (num_levels < 1)
911 return NULL; /* Not an array of vectors */
912
913 struct vec_var_usage *usage =
914 rzalloc_size(mem_ctx, sizeof(*usage) +
915 num_levels * sizeof(usage->levels[0]));
916
917 usage->num_levels = num_levels;
918 const struct glsl_type *type = var->type;
919 for (unsigned i = 0; i < num_levels; i++) {
920 usage->levels[i].array_len = glsl_get_length(type);
921 type = glsl_get_array_element(type);
922 }
923 assert(glsl_type_is_vector_or_scalar(type));
924
925 usage->all_comps = (1 << glsl_get_components(type)) - 1;
926
927 _mesa_hash_table_insert(var_usage_map, var, usage);
928
929 return usage;
930 }
931
932 static struct vec_var_usage *
933 get_vec_deref_usage(nir_deref_instr *deref,
934 struct hash_table *var_usage_map,
935 nir_variable_mode modes,
936 bool add_usage_entry, void *mem_ctx)
937 {
938 if (!(deref->mode & modes))
939 return NULL;
940
941 return get_vec_var_usage(nir_deref_instr_get_variable(deref),
942 var_usage_map, add_usage_entry, mem_ctx);
943 }
944
945 static void
946 mark_deref_used(nir_deref_instr *deref,
947 nir_component_mask_t comps_read,
948 nir_component_mask_t comps_written,
949 nir_deref_instr *copy_deref,
950 struct hash_table *var_usage_map,
951 nir_variable_mode modes,
952 void *mem_ctx)
953 {
954 if (!(deref->mode & modes))
955 return;
956
957 nir_variable *var = nir_deref_instr_get_variable(deref);
958
959 struct vec_var_usage *usage =
960 get_vec_var_usage(var, var_usage_map, true, mem_ctx);
961 if (!usage)
962 return;
963
964 usage->comps_read |= comps_read & usage->all_comps;
965 usage->comps_written |= comps_written & usage->all_comps;
966
967 struct vec_var_usage *copy_usage = NULL;
968 if (copy_deref) {
969 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
970 true, mem_ctx);
971 if (copy_usage) {
972 if (usage->vars_copied == NULL) {
973 usage->vars_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
974 _mesa_key_pointer_equal);
975 }
976 _mesa_set_add(usage->vars_copied, copy_usage);
977 } else {
978 usage->has_external_copy = true;
979 }
980 }
981
982 nir_deref_path path;
983 nir_deref_path_init(&path, deref, mem_ctx);
984
985 nir_deref_path copy_path;
986 if (copy_usage)
987 nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
988
989 unsigned copy_i = 0;
990 for (unsigned i = 0; i < usage->num_levels; i++) {
991 struct array_level_usage *level = &usage->levels[i];
992 nir_deref_instr *deref = path.path[i + 1];
993 assert(deref->deref_type == nir_deref_type_array ||
994 deref->deref_type == nir_deref_type_array_wildcard);
995
996 unsigned max_used;
997 if (deref->deref_type == nir_deref_type_array) {
998 max_used = nir_src_is_const(deref->arr.index) ?
999 nir_src_as_uint(deref->arr.index) : UINT_MAX;
1000 } else {
1001 /* For wildcards, we read or wrote the whole thing. */
1002 assert(deref->deref_type == nir_deref_type_array_wildcard);
1003 max_used = level->array_len - 1;
1004
1005 if (copy_usage) {
1006 /* Match each wildcard level with the level on copy_usage */
1007 for (; copy_path.path[copy_i + 1]; copy_i++) {
1008 if (copy_path.path[copy_i + 1]->deref_type ==
1009 nir_deref_type_array_wildcard)
1010 break;
1011 }
1012 struct array_level_usage *copy_level =
1013 &copy_usage->levels[copy_i++];
1014
1015 if (level->levels_copied == NULL) {
1016 level->levels_copied =
1017 _mesa_set_create(mem_ctx, _mesa_hash_pointer,
1018 _mesa_key_pointer_equal);
1019 }
1020 _mesa_set_add(level->levels_copied, copy_level);
1021 } else {
1022 /* We have a wildcard and it comes from a variable we aren't
1023 * tracking; flag it and we'll know to not shorten this array.
1024 */
1025 level->has_external_copy = true;
1026 }
1027 }
1028
1029 if (comps_written)
1030 level->max_written = MAX2(level->max_written, max_used);
1031 if (comps_read)
1032 level->max_read = MAX2(level->max_read, max_used);
1033 }
1034 }
1035
1036 static bool
1037 src_is_load_deref(nir_src src, nir_src deref_src)
1038 {
1039 assert(src.is_ssa);
1040 assert(deref_src.is_ssa);
1041
1042 if (src.ssa->parent_instr->type != nir_instr_type_intrinsic)
1043 return false;
1044
1045 nir_intrinsic_instr *load = nir_instr_as_intrinsic(src.ssa->parent_instr);
1046 if (load->intrinsic != nir_intrinsic_load_deref)
1047 return false;
1048
1049 assert(load->src[0].is_ssa);
1050
1051 return load->src[0].ssa == deref_src.ssa;
1052 }
1053
1054 /* Returns all non-self-referential components of a store instruction. A
1055 * component is self-referential if it comes from the same component of a load
1056 * instruction on the same deref. If the only data in a particular component
1057 * of a variable came directly from that component then it's undefined. The
1058 * only way to get defined data into a component of a variable is for it to
1059 * get written there by something outside or from a different component.
1060 *
1061 * This is a fairly common pattern in shaders that come from either GLSL IR or
1062 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1063 * load-vec-store.
1064 */
1065 static nir_component_mask_t
1066 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1067 {
1068 nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1069
1070 assert(store->src[1].is_ssa);
1071 nir_instr *src_instr = store->src[1].ssa->parent_instr;
1072 if (src_instr->type != nir_instr_type_alu)
1073 return comps;
1074
1075 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1076
1077 if (src_alu->op == nir_op_imov ||
1078 src_alu->op == nir_op_fmov) {
1079 /* If it's just a swizzle of a load from the same deref, discount any
1080 * channels that don't move in the swizzle.
1081 */
1082 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1083 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1084 if (src_alu->src[0].swizzle[i] == i)
1085 comps &= ~(1u << i);
1086 }
1087 }
1088 } else if (src_alu->op == nir_op_vec2 ||
1089 src_alu->op == nir_op_vec3 ||
1090 src_alu->op == nir_op_vec4) {
1091 /* If it's a vec, discount any channels that are just loads from the
1092 * same deref put in the same spot.
1093 */
1094 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1095 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1096 src_alu->src[i].swizzle[0] == i)
1097 comps &= ~(1u << i);
1098 }
1099 }
1100
1101 return comps;
1102 }
1103
1104 static void
1105 find_used_components_impl(nir_function_impl *impl,
1106 struct hash_table *var_usage_map,
1107 nir_variable_mode modes,
1108 void *mem_ctx)
1109 {
1110 nir_foreach_block(block, impl) {
1111 nir_foreach_instr(instr, block) {
1112 if (instr->type != nir_instr_type_intrinsic)
1113 continue;
1114
1115 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1116 switch (intrin->intrinsic) {
1117 case nir_intrinsic_load_deref:
1118 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1119 nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1120 NULL, var_usage_map, modes, mem_ctx);
1121 break;
1122
1123 case nir_intrinsic_store_deref:
1124 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1125 0, get_non_self_referential_store_comps(intrin),
1126 NULL, var_usage_map, modes, mem_ctx);
1127 break;
1128
1129 case nir_intrinsic_copy_deref: {
1130 /* Just mark everything used for copies. */
1131 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1132 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1133 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1134 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1135 break;
1136 }
1137
1138 default:
1139 break;
1140 }
1141 }
1142 }
1143 }
1144
1145 static bool
1146 shrink_vec_var_list(struct exec_list *vars,
1147 struct hash_table *var_usage_map)
1148 {
1149 /* Initialize the components kept field of each variable. This is the
1150 * AND of the components written and components read. If a component is
1151 * written but never read, it's dead. If it is read but never written,
1152 * then all values read are undefined garbage and we may as well not read
1153 * them.
1154 *
1155 * The same logic applies to the array length. We make the array length
1156 * the minimum needed required length between read and write and plan to
1157 * discard any OOB access. The one exception here is indirect writes
1158 * because we don't know where they will land and we can't shrink an array
1159 * with indirect writes because previously in-bounds writes may become
1160 * out-of-bounds and have undefined behavior.
1161 *
1162 * Also, if we have a copy that to/from something we can't shrink, we need
1163 * to leave components and array_len of any wildcards alone.
1164 */
1165 nir_foreach_variable(var, vars) {
1166 struct vec_var_usage *usage =
1167 get_vec_var_usage(var, var_usage_map, false, NULL);
1168 if (!usage)
1169 continue;
1170
1171 assert(usage->comps_kept == 0);
1172 if (usage->has_external_copy)
1173 usage->comps_kept = usage->all_comps;
1174 else
1175 usage->comps_kept = usage->comps_read & usage->comps_written;
1176
1177 for (unsigned i = 0; i < usage->num_levels; i++) {
1178 struct array_level_usage *level = &usage->levels[i];
1179 assert(level->array_len > 0);
1180
1181 if (level->max_written == UINT_MAX || level->has_external_copy)
1182 continue; /* Can't shrink */
1183
1184 unsigned max_used = MIN2(level->max_read, level->max_written);
1185 level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1186 }
1187 }
1188
1189 /* In order for variable copies to work, we have to have the same data type
1190 * on the source and the destination. In order to satisfy this, we run a
1191 * little fixed-point algorithm to transitively ensure that we get enough
1192 * components and array elements for this to hold for all copies.
1193 */
1194 bool fp_progress;
1195 do {
1196 fp_progress = false;
1197 nir_foreach_variable(var, vars) {
1198 struct vec_var_usage *var_usage =
1199 get_vec_var_usage(var, var_usage_map, false, NULL);
1200 if (!var_usage || !var_usage->vars_copied)
1201 continue;
1202
1203 set_foreach(var_usage->vars_copied, copy_entry) {
1204 struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1205 if (copy_usage->comps_kept != var_usage->comps_kept) {
1206 nir_component_mask_t comps_kept =
1207 (var_usage->comps_kept | copy_usage->comps_kept);
1208 var_usage->comps_kept = comps_kept;
1209 copy_usage->comps_kept = comps_kept;
1210 fp_progress = true;
1211 }
1212 }
1213
1214 for (unsigned i = 0; i < var_usage->num_levels; i++) {
1215 struct array_level_usage *var_level = &var_usage->levels[i];
1216 if (!var_level->levels_copied)
1217 continue;
1218
1219 set_foreach(var_level->levels_copied, copy_entry) {
1220 struct array_level_usage *copy_level = (void *)copy_entry->key;
1221 if (var_level->array_len != copy_level->array_len) {
1222 unsigned array_len =
1223 MAX2(var_level->array_len, copy_level->array_len);
1224 var_level->array_len = array_len;
1225 copy_level->array_len = array_len;
1226 fp_progress = true;
1227 }
1228 }
1229 }
1230 }
1231 } while (fp_progress);
1232
1233 bool vars_shrunk = false;
1234 nir_foreach_variable_safe(var, vars) {
1235 struct vec_var_usage *usage =
1236 get_vec_var_usage(var, var_usage_map, false, NULL);
1237 if (!usage)
1238 continue;
1239
1240 bool shrunk = false;
1241 const struct glsl_type *vec_type = var->type;
1242 for (unsigned i = 0; i < usage->num_levels; i++) {
1243 /* If we've reduced the array to zero elements at some level, just
1244 * set comps_kept to 0 and delete the variable.
1245 */
1246 if (usage->levels[i].array_len == 0) {
1247 usage->comps_kept = 0;
1248 break;
1249 }
1250
1251 assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1252 if (usage->levels[i].array_len < glsl_get_length(vec_type))
1253 shrunk = true;
1254 vec_type = glsl_get_array_element(vec_type);
1255 }
1256 assert(glsl_type_is_vector_or_scalar(vec_type));
1257
1258 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1259 if (usage->comps_kept != usage->all_comps)
1260 shrunk = true;
1261
1262 if (usage->comps_kept == 0) {
1263 /* This variable is dead, remove it */
1264 vars_shrunk = true;
1265 exec_node_remove(&var->node);
1266 continue;
1267 }
1268
1269 if (!shrunk) {
1270 /* This variable doesn't need to be shrunk. Remove it from the
1271 * hash table so later steps will ignore it.
1272 */
1273 _mesa_hash_table_remove_key(var_usage_map, var);
1274 continue;
1275 }
1276
1277 /* Build the new var type */
1278 unsigned new_num_comps = util_bitcount(usage->comps_kept);
1279 const struct glsl_type *new_type =
1280 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1281 for (int i = usage->num_levels - 1; i >= 0; i--) {
1282 assert(usage->levels[i].array_len > 0);
1283 /* If the original type was a matrix type, we'd like to keep that so
1284 * we don't convert matrices into arrays.
1285 */
1286 if (i == usage->num_levels - 1 &&
1287 glsl_type_is_matrix(glsl_without_array(var->type)) &&
1288 new_num_comps > 1 && usage->levels[i].array_len > 1) {
1289 new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1290 new_num_comps,
1291 usage->levels[i].array_len);
1292 } else {
1293 new_type = glsl_array_type(new_type, usage->levels[i].array_len);
1294 }
1295 }
1296 var->type = new_type;
1297
1298 vars_shrunk = true;
1299 }
1300
1301 return vars_shrunk;
1302 }
1303
1304 static bool
1305 vec_deref_is_oob(nir_deref_instr *deref,
1306 struct vec_var_usage *usage)
1307 {
1308 nir_deref_path path;
1309 nir_deref_path_init(&path, deref, NULL);
1310
1311 bool oob = false;
1312 for (unsigned i = 0; i < usage->num_levels; i++) {
1313 nir_deref_instr *p = path.path[i + 1];
1314 if (p->deref_type == nir_deref_type_array_wildcard)
1315 continue;
1316
1317 if (nir_src_is_const(p->arr.index) &&
1318 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1319 oob = true;
1320 break;
1321 }
1322 }
1323
1324 nir_deref_path_finish(&path);
1325
1326 return oob;
1327 }
1328
1329 static bool
1330 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1331 struct hash_table *var_usage_map,
1332 nir_variable_mode modes)
1333 {
1334 struct vec_var_usage *usage =
1335 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1336 if (!usage)
1337 return false;
1338
1339 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1340 }
1341
1342 static void
1343 shrink_vec_var_access_impl(nir_function_impl *impl,
1344 struct hash_table *var_usage_map,
1345 nir_variable_mode modes)
1346 {
1347 nir_builder b;
1348 nir_builder_init(&b, impl);
1349
1350 nir_foreach_block(block, impl) {
1351 nir_foreach_instr_safe(instr, block) {
1352 switch (instr->type) {
1353 case nir_instr_type_deref: {
1354 nir_deref_instr *deref = nir_instr_as_deref(instr);
1355 if (!(deref->mode & modes))
1356 break;
1357
1358 /* Clean up any dead derefs we find lying around. They may refer
1359 * to variables we've deleted.
1360 */
1361 if (nir_deref_instr_remove_if_unused(deref))
1362 break;
1363
1364 /* Update the type in the deref to keep the types consistent as
1365 * you walk down the chain. We don't need to check if this is one
1366 * of the derefs we're shrinking because this is a no-op if it
1367 * isn't. The worst that could happen is that we accidentally fix
1368 * an invalid deref.
1369 */
1370 if (deref->deref_type == nir_deref_type_var) {
1371 deref->type = deref->var->type;
1372 } else if (deref->deref_type == nir_deref_type_array ||
1373 deref->deref_type == nir_deref_type_array_wildcard) {
1374 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1375 assert(glsl_type_is_array(parent->type) ||
1376 glsl_type_is_matrix(parent->type));
1377 deref->type = glsl_get_array_element(parent->type);
1378 }
1379 break;
1380 }
1381
1382 case nir_instr_type_intrinsic: {
1383 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1384
1385 /* If we have a copy whose source or destination has been deleted
1386 * because we determined the variable was dead, then we just
1387 * delete the copy instruction. If the source variable was dead
1388 * then it was writing undefined garbage anyway and if it's the
1389 * destination variable that's dead then the write isn't needed.
1390 */
1391 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1392 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1393 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1394 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1395 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1396 nir_instr_remove(&intrin->instr);
1397 nir_deref_instr_remove_if_unused(dst);
1398 nir_deref_instr_remove_if_unused(src);
1399 }
1400 continue;
1401 }
1402
1403 if (intrin->intrinsic != nir_intrinsic_load_deref &&
1404 intrin->intrinsic != nir_intrinsic_store_deref)
1405 continue;
1406
1407 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1408 if (!(deref->mode & modes))
1409 continue;
1410
1411 struct vec_var_usage *usage =
1412 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1413 if (!usage)
1414 continue;
1415
1416 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1417 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1418 nir_ssa_def *u =
1419 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1420 intrin->dest.ssa.bit_size);
1421 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1422 nir_src_for_ssa(u));
1423 }
1424 nir_instr_remove(&intrin->instr);
1425 nir_deref_instr_remove_if_unused(deref);
1426 continue;
1427 }
1428
1429 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1430 b.cursor = nir_after_instr(&intrin->instr);
1431
1432 nir_ssa_def *undef =
1433 nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1434 nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1435 unsigned c = 0;
1436 for (unsigned i = 0; i < intrin->num_components; i++) {
1437 if (usage->comps_kept & (1u << i))
1438 vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1439 else
1440 vec_srcs[i] = undef;
1441 }
1442 nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1443
1444 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1445 nir_src_for_ssa(vec),
1446 vec->parent_instr);
1447
1448 /* The SSA def is now only used by the swizzle. It's safe to
1449 * shrink the number of components.
1450 */
1451 assert(list_length(&intrin->dest.ssa.uses) == c);
1452 intrin->num_components = c;
1453 intrin->dest.ssa.num_components = c;
1454 } else {
1455 nir_component_mask_t write_mask =
1456 nir_intrinsic_write_mask(intrin);
1457
1458 unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1459 nir_component_mask_t new_write_mask = 0;
1460 unsigned c = 0;
1461 for (unsigned i = 0; i < intrin->num_components; i++) {
1462 if (usage->comps_kept & (1u << i)) {
1463 swizzle[c] = i;
1464 if (write_mask & (1u << i))
1465 new_write_mask |= 1u << c;
1466 c++;
1467 }
1468 }
1469
1470 b.cursor = nir_before_instr(&intrin->instr);
1471
1472 nir_ssa_def *swizzled =
1473 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false);
1474
1475 /* Rewrite to use the compacted source */
1476 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1477 nir_src_for_ssa(swizzled));
1478 nir_intrinsic_set_write_mask(intrin, new_write_mask);
1479 intrin->num_components = c;
1480 }
1481 break;
1482 }
1483
1484 default:
1485 break;
1486 }
1487 }
1488 }
1489 }
1490
1491 static bool
1492 function_impl_has_vars_with_modes(nir_function_impl *impl,
1493 nir_variable_mode modes)
1494 {
1495 nir_shader *shader = impl->function->shader;
1496
1497 if ((modes & nir_var_global) && !exec_list_is_empty(&shader->globals))
1498 return true;
1499
1500 if ((modes & nir_var_local) && !exec_list_is_empty(&impl->locals))
1501 return true;
1502
1503 return false;
1504 }
1505
1506 /** Attempt to shrink arrays of vectors
1507 *
1508 * This pass looks at variables which contain a vector or an array (possibly
1509 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1510 * or array. If the pass can prove that a component of a vector (or array of
1511 * vectors) is never really used, then that component will be removed.
1512 * Similarly, the pass attempts to shorten arrays based on what elements it
1513 * can prove are never read or never contain valid data.
1514 */
1515 bool
1516 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1517 {
1518 assert((modes & (nir_var_global | nir_var_local)) == modes);
1519
1520 void *mem_ctx = ralloc_context(NULL);
1521
1522 struct hash_table *var_usage_map =
1523 _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
1524 _mesa_key_pointer_equal);
1525
1526 bool has_vars_to_shrink = false;
1527 nir_foreach_function(function, shader) {
1528 if (!function->impl)
1529 continue;
1530
1531 /* Don't even bother crawling the IR if we don't have any variables.
1532 * Given that this pass deletes any unused variables, it's likely that
1533 * we will be in this scenario eventually.
1534 */
1535 if (function_impl_has_vars_with_modes(function->impl, modes)) {
1536 has_vars_to_shrink = true;
1537 find_used_components_impl(function->impl, var_usage_map,
1538 modes, mem_ctx);
1539 }
1540 }
1541 if (!has_vars_to_shrink) {
1542 ralloc_free(mem_ctx);
1543 return false;
1544 }
1545
1546 bool globals_shrunk = false;
1547 if (modes & nir_var_global)
1548 globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
1549
1550 bool progress = false;
1551 nir_foreach_function(function, shader) {
1552 if (!function->impl)
1553 continue;
1554
1555 bool locals_shrunk = false;
1556 if (modes & nir_var_local) {
1557 locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1558 var_usage_map);
1559 }
1560
1561 if (globals_shrunk || locals_shrunk) {
1562 shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1563
1564 nir_metadata_preserve(function->impl, nir_metadata_block_index |
1565 nir_metadata_dominance);
1566 progress = true;
1567 }
1568 }
1569
1570 ralloc_free(mem_ctx);
1571
1572 return progress;
1573 }