nir: rename nir_var_function to nir_var_function_temp
[mesa.git] / src / compiler / nir / nir_split_vars.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28
29 #include "util/u_math.h"
30
31
32 struct split_var_state {
33 void *mem_ctx;
34
35 nir_shader *shader;
36 nir_function_impl *impl;
37
38 nir_variable *base_var;
39 };
40
41 struct field {
42 struct field *parent;
43
44 const struct glsl_type *type;
45
46 unsigned num_fields;
47 struct field *fields;
48
49 nir_variable *var;
50 };
51
52 static const struct glsl_type *
53 wrap_type_in_array(const struct glsl_type *type,
54 const struct glsl_type *array_type)
55 {
56 if (!glsl_type_is_array(array_type))
57 return type;
58
59 const struct glsl_type *elem_type =
60 wrap_type_in_array(type, glsl_get_array_element(array_type));
61 assert(glsl_get_explicit_stride(array_type) == 0);
62 return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
63 }
64
65 static int
66 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
67 {
68 int num_levels = 0;
69 while (true) {
70 if (glsl_type_is_array_or_matrix(type)) {
71 num_levels++;
72 type = glsl_get_array_element(type);
73 } else if (glsl_type_is_vector_or_scalar(type)) {
74 return num_levels;
75 } else {
76 /* Not an array of vectors */
77 return -1;
78 }
79 }
80 }
81
82 static void
83 init_field_for_type(struct field *field, struct field *parent,
84 const struct glsl_type *type,
85 const char *name,
86 struct split_var_state *state)
87 {
88 *field = (struct field) {
89 .parent = parent,
90 .type = type,
91 };
92
93 const struct glsl_type *struct_type = glsl_without_array(type);
94 if (glsl_type_is_struct(struct_type)) {
95 field->num_fields = glsl_get_length(struct_type),
96 field->fields = ralloc_array(state->mem_ctx, struct field,
97 field->num_fields);
98 for (unsigned i = 0; i < field->num_fields; i++) {
99 char *field_name = NULL;
100 if (name) {
101 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
102 glsl_get_struct_elem_name(struct_type, i));
103 } else {
104 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
105 glsl_get_type_name(struct_type),
106 glsl_get_struct_elem_name(struct_type, i));
107 }
108 init_field_for_type(&field->fields[i], field,
109 glsl_get_struct_field(struct_type, i),
110 field_name, state);
111 }
112 } else {
113 const struct glsl_type *var_type = type;
114 for (struct field *f = field->parent; f; f = f->parent)
115 var_type = wrap_type_in_array(var_type, f->type);
116
117 nir_variable_mode mode = state->base_var->data.mode;
118 if (mode == nir_var_function_temp) {
119 field->var = nir_local_variable_create(state->impl, var_type, name);
120 } else {
121 field->var = nir_variable_create(state->shader, mode, var_type, name);
122 }
123 }
124 }
125
126 static bool
127 split_var_list_structs(nir_shader *shader,
128 nir_function_impl *impl,
129 struct exec_list *vars,
130 struct hash_table *var_field_map,
131 void *mem_ctx)
132 {
133 struct split_var_state state = {
134 .mem_ctx = mem_ctx,
135 .shader = shader,
136 .impl = impl,
137 };
138
139 struct exec_list split_vars;
140 exec_list_make_empty(&split_vars);
141
142 /* To avoid list confusion (we'll be adding things as we split variables),
143 * pull all of the variables we plan to split off of the list
144 */
145 nir_foreach_variable_safe(var, vars) {
146 if (!glsl_type_is_struct(glsl_without_array(var->type)))
147 continue;
148
149 exec_node_remove(&var->node);
150 exec_list_push_tail(&split_vars, &var->node);
151 }
152
153 nir_foreach_variable(var, &split_vars) {
154 state.base_var = var;
155
156 struct field *root_field = ralloc(mem_ctx, struct field);
157 init_field_for_type(root_field, NULL, var->type, var->name, &state);
158 _mesa_hash_table_insert(var_field_map, var, root_field);
159 }
160
161 return !exec_list_is_empty(&split_vars);
162 }
163
164 static void
165 split_struct_derefs_impl(nir_function_impl *impl,
166 struct hash_table *var_field_map,
167 nir_variable_mode modes,
168 void *mem_ctx)
169 {
170 nir_builder b;
171 nir_builder_init(&b, impl);
172
173 nir_foreach_block(block, impl) {
174 nir_foreach_instr_safe(instr, block) {
175 if (instr->type != nir_instr_type_deref)
176 continue;
177
178 nir_deref_instr *deref = nir_instr_as_deref(instr);
179 if (!(deref->mode & modes))
180 continue;
181
182 /* Clean up any dead derefs we find lying around. They may refer to
183 * variables we're planning to split.
184 */
185 if (nir_deref_instr_remove_if_unused(deref))
186 continue;
187
188 if (!glsl_type_is_vector_or_scalar(deref->type))
189 continue;
190
191 nir_variable *base_var = nir_deref_instr_get_variable(deref);
192 struct hash_entry *entry =
193 _mesa_hash_table_search(var_field_map, base_var);
194 if (!entry)
195 continue;
196
197 struct field *root_field = entry->data;
198
199 nir_deref_path path;
200 nir_deref_path_init(&path, deref, mem_ctx);
201
202 struct field *tail_field = root_field;
203 for (unsigned i = 0; path.path[i]; i++) {
204 if (path.path[i]->deref_type != nir_deref_type_struct)
205 continue;
206
207 assert(i > 0);
208 assert(glsl_type_is_struct(path.path[i - 1]->type));
209 assert(path.path[i - 1]->type ==
210 glsl_without_array(tail_field->type));
211
212 tail_field = &tail_field->fields[path.path[i]->strct.index];
213 }
214 nir_variable *split_var = tail_field->var;
215
216 nir_deref_instr *new_deref = NULL;
217 for (unsigned i = 0; path.path[i]; i++) {
218 nir_deref_instr *p = path.path[i];
219 b.cursor = nir_after_instr(&p->instr);
220
221 switch (p->deref_type) {
222 case nir_deref_type_var:
223 assert(new_deref == NULL);
224 new_deref = nir_build_deref_var(&b, split_var);
225 break;
226
227 case nir_deref_type_array:
228 case nir_deref_type_array_wildcard:
229 new_deref = nir_build_deref_follower(&b, new_deref, p);
230 break;
231
232 case nir_deref_type_struct:
233 /* Nothing to do; we're splitting structs */
234 break;
235
236 default:
237 unreachable("Invalid deref type in path");
238 }
239 }
240
241 assert(new_deref->type == deref->type);
242 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
243 nir_src_for_ssa(&new_deref->dest.ssa));
244 nir_deref_instr_remove_if_unused(deref);
245 }
246 }
247 }
248
249 /** A pass for splitting structs into multiple variables
250 *
251 * This pass splits arrays of structs into multiple variables, one for each
252 * (possibly nested) structure member. After this pass completes, no
253 * variables of the given mode will contain a struct type.
254 */
255 bool
256 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
257 {
258 void *mem_ctx = ralloc_context(NULL);
259 struct hash_table *var_field_map =
260 _mesa_pointer_hash_table_create(mem_ctx);
261
262 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
263
264 bool has_global_splits = false;
265 if (modes & nir_var_shader_temp) {
266 has_global_splits = split_var_list_structs(shader, NULL,
267 &shader->globals,
268 var_field_map, mem_ctx);
269 }
270
271 bool progress = false;
272 nir_foreach_function(function, shader) {
273 if (!function->impl)
274 continue;
275
276 bool has_local_splits = false;
277 if (modes & nir_var_function_temp) {
278 has_local_splits = split_var_list_structs(shader, function->impl,
279 &function->impl->locals,
280 var_field_map, mem_ctx);
281 }
282
283 if (has_global_splits || has_local_splits) {
284 split_struct_derefs_impl(function->impl, var_field_map,
285 modes, mem_ctx);
286
287 nir_metadata_preserve(function->impl, nir_metadata_block_index |
288 nir_metadata_dominance);
289 progress = true;
290 }
291 }
292
293 ralloc_free(mem_ctx);
294
295 return progress;
296 }
297
298 struct array_level_info {
299 unsigned array_len;
300 bool split;
301 };
302
303 struct array_split {
304 /* Only set if this is the tail end of the splitting */
305 nir_variable *var;
306
307 unsigned num_splits;
308 struct array_split *splits;
309 };
310
311 struct array_var_info {
312 nir_variable *base_var;
313
314 const struct glsl_type *split_var_type;
315
316 bool split_var;
317 struct array_split root_split;
318
319 unsigned num_levels;
320 struct array_level_info levels[0];
321 };
322
323 static bool
324 init_var_list_array_infos(struct exec_list *vars,
325 struct hash_table *var_info_map,
326 void *mem_ctx)
327 {
328 bool has_array = false;
329
330 nir_foreach_variable(var, vars) {
331 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
332 if (num_levels <= 0)
333 continue;
334
335 struct array_var_info *info =
336 rzalloc_size(mem_ctx, sizeof(*info) +
337 num_levels * sizeof(info->levels[0]));
338
339 info->base_var = var;
340 info->num_levels = num_levels;
341
342 const struct glsl_type *type = var->type;
343 for (int i = 0; i < num_levels; i++) {
344 assert(glsl_get_explicit_stride(type) == 0);
345 info->levels[i].array_len = glsl_get_length(type);
346 type = glsl_get_array_element(type);
347
348 /* All levels start out initially as split */
349 info->levels[i].split = true;
350 }
351
352 _mesa_hash_table_insert(var_info_map, var, info);
353 has_array = true;
354 }
355
356 return has_array;
357 }
358
359 static struct array_var_info *
360 get_array_var_info(nir_variable *var,
361 struct hash_table *var_info_map)
362 {
363 struct hash_entry *entry =
364 _mesa_hash_table_search(var_info_map, var);
365 return entry ? entry->data : NULL;
366 }
367
368 static struct array_var_info *
369 get_array_deref_info(nir_deref_instr *deref,
370 struct hash_table *var_info_map,
371 nir_variable_mode modes)
372 {
373 if (!(deref->mode & modes))
374 return NULL;
375
376 return get_array_var_info(nir_deref_instr_get_variable(deref),
377 var_info_map);
378 }
379
380 static void
381 mark_array_deref_used(nir_deref_instr *deref,
382 struct hash_table *var_info_map,
383 nir_variable_mode modes,
384 void *mem_ctx)
385 {
386 struct array_var_info *info =
387 get_array_deref_info(deref, var_info_map, modes);
388 if (!info)
389 return;
390
391 nir_deref_path path;
392 nir_deref_path_init(&path, deref, mem_ctx);
393
394 /* Walk the path and look for indirects. If we have an array deref with an
395 * indirect, mark the given level as not being split.
396 */
397 for (unsigned i = 0; i < info->num_levels; i++) {
398 nir_deref_instr *p = path.path[i + 1];
399 if (p->deref_type == nir_deref_type_array &&
400 !nir_src_is_const(p->arr.index))
401 info->levels[i].split = false;
402 }
403 }
404
405 static void
406 mark_array_usage_impl(nir_function_impl *impl,
407 struct hash_table *var_info_map,
408 nir_variable_mode modes,
409 void *mem_ctx)
410 {
411 nir_foreach_block(block, impl) {
412 nir_foreach_instr(instr, block) {
413 if (instr->type != nir_instr_type_intrinsic)
414 continue;
415
416 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
417 switch (intrin->intrinsic) {
418 case nir_intrinsic_copy_deref:
419 mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
420 var_info_map, modes, mem_ctx);
421 /* Fall Through */
422
423 case nir_intrinsic_load_deref:
424 case nir_intrinsic_store_deref:
425 mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
426 var_info_map, modes, mem_ctx);
427 break;
428
429 default:
430 break;
431 }
432 }
433 }
434 }
435
436 static void
437 create_split_array_vars(struct array_var_info *var_info,
438 unsigned level,
439 struct array_split *split,
440 const char *name,
441 nir_shader *shader,
442 nir_function_impl *impl,
443 void *mem_ctx)
444 {
445 while (level < var_info->num_levels && !var_info->levels[level].split) {
446 name = ralloc_asprintf(mem_ctx, "%s[*]", name);
447 level++;
448 }
449
450 if (level == var_info->num_levels) {
451 /* We add parens to the variable name so it looks like "(foo[2][*])" so
452 * that further derefs will look like "(foo[2][*])[ssa_6]"
453 */
454 name = ralloc_asprintf(mem_ctx, "(%s)", name);
455
456 nir_variable_mode mode = var_info->base_var->data.mode;
457 if (mode == nir_var_function_temp) {
458 split->var = nir_local_variable_create(impl,
459 var_info->split_var_type, name);
460 } else {
461 split->var = nir_variable_create(shader, mode,
462 var_info->split_var_type, name);
463 }
464 } else {
465 assert(var_info->levels[level].split);
466 split->num_splits = var_info->levels[level].array_len;
467 split->splits = rzalloc_array(mem_ctx, struct array_split,
468 split->num_splits);
469 for (unsigned i = 0; i < split->num_splits; i++) {
470 create_split_array_vars(var_info, level + 1, &split->splits[i],
471 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
472 shader, impl, mem_ctx);
473 }
474 }
475 }
476
477 static bool
478 split_var_list_arrays(nir_shader *shader,
479 nir_function_impl *impl,
480 struct exec_list *vars,
481 struct hash_table *var_info_map,
482 void *mem_ctx)
483 {
484 struct exec_list split_vars;
485 exec_list_make_empty(&split_vars);
486
487 nir_foreach_variable_safe(var, vars) {
488 struct array_var_info *info = get_array_var_info(var, var_info_map);
489 if (!info)
490 continue;
491
492 bool has_split = false;
493 const struct glsl_type *split_type =
494 glsl_without_array_or_matrix(var->type);
495 for (int i = info->num_levels - 1; i >= 0; i--) {
496 if (info->levels[i].split) {
497 has_split = true;
498 continue;
499 }
500
501 /* If the original type was a matrix type, we'd like to keep that so
502 * we don't convert matrices into arrays.
503 */
504 if (i == info->num_levels - 1 &&
505 glsl_type_is_matrix(glsl_without_array(var->type))) {
506 split_type = glsl_matrix_type(glsl_get_base_type(split_type),
507 glsl_get_components(split_type),
508 info->levels[i].array_len);
509 } else {
510 split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
511 }
512 }
513
514 if (has_split) {
515 info->split_var_type = split_type;
516 /* To avoid list confusion (we'll be adding things as we split
517 * variables), pull all of the variables we plan to split off of the
518 * main variable list.
519 */
520 exec_node_remove(&var->node);
521 exec_list_push_tail(&split_vars, &var->node);
522 } else {
523 assert(split_type == var->type);
524 /* If we're not modifying this variable, delete the info so we skip
525 * it faster in later passes.
526 */
527 _mesa_hash_table_remove_key(var_info_map, var);
528 }
529 }
530
531 nir_foreach_variable(var, &split_vars) {
532 struct array_var_info *info = get_array_var_info(var, var_info_map);
533 create_split_array_vars(info, 0, &info->root_split, var->name,
534 shader, impl, mem_ctx);
535 }
536
537 return !exec_list_is_empty(&split_vars);
538 }
539
540 static bool
541 deref_has_split_wildcard(nir_deref_path *path,
542 struct array_var_info *info)
543 {
544 if (info == NULL)
545 return false;
546
547 assert(path->path[0]->var == info->base_var);
548 for (unsigned i = 0; i < info->num_levels; i++) {
549 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
550 info->levels[i].split)
551 return true;
552 }
553
554 return false;
555 }
556
557 static bool
558 array_path_is_out_of_bounds(nir_deref_path *path,
559 struct array_var_info *info)
560 {
561 if (info == NULL)
562 return false;
563
564 assert(path->path[0]->var == info->base_var);
565 for (unsigned i = 0; i < info->num_levels; i++) {
566 nir_deref_instr *p = path->path[i + 1];
567 if (p->deref_type == nir_deref_type_array_wildcard)
568 continue;
569
570 if (nir_src_is_const(p->arr.index) &&
571 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
572 return true;
573 }
574
575 return false;
576 }
577
578 static void
579 emit_split_copies(nir_builder *b,
580 struct array_var_info *dst_info, nir_deref_path *dst_path,
581 unsigned dst_level, nir_deref_instr *dst,
582 struct array_var_info *src_info, nir_deref_path *src_path,
583 unsigned src_level, nir_deref_instr *src)
584 {
585 nir_deref_instr *dst_p, *src_p;
586
587 while ((dst_p = dst_path->path[dst_level + 1])) {
588 if (dst_p->deref_type == nir_deref_type_array_wildcard)
589 break;
590
591 dst = nir_build_deref_follower(b, dst, dst_p);
592 dst_level++;
593 }
594
595 while ((src_p = src_path->path[src_level + 1])) {
596 if (src_p->deref_type == nir_deref_type_array_wildcard)
597 break;
598
599 src = nir_build_deref_follower(b, src, src_p);
600 src_level++;
601 }
602
603 if (src_p == NULL || dst_p == NULL) {
604 assert(src_p == NULL && dst_p == NULL);
605 nir_copy_deref(b, dst, src);
606 } else {
607 assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
608 src_p->deref_type == nir_deref_type_array_wildcard);
609
610 if ((dst_info && dst_info->levels[dst_level].split) ||
611 (src_info && src_info->levels[src_level].split)) {
612 /* There are no indirects at this level on one of the source or the
613 * destination so we are lowering it.
614 */
615 assert(glsl_get_length(dst_path->path[dst_level]->type) ==
616 glsl_get_length(src_path->path[src_level]->type));
617 unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
618 for (unsigned i = 0; i < len; i++) {
619 nir_ssa_def *idx = nir_imm_int(b, i);
620 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
621 nir_build_deref_array(b, dst, idx),
622 src_info, src_path, src_level + 1,
623 nir_build_deref_array(b, src, idx));
624 }
625 } else {
626 /* Neither side is being split so we just keep going */
627 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
628 nir_build_deref_array_wildcard(b, dst),
629 src_info, src_path, src_level + 1,
630 nir_build_deref_array_wildcard(b, src));
631 }
632 }
633 }
634
635 static void
636 split_array_copies_impl(nir_function_impl *impl,
637 struct hash_table *var_info_map,
638 nir_variable_mode modes,
639 void *mem_ctx)
640 {
641 nir_builder b;
642 nir_builder_init(&b, impl);
643
644 nir_foreach_block(block, impl) {
645 nir_foreach_instr_safe(instr, block) {
646 if (instr->type != nir_instr_type_intrinsic)
647 continue;
648
649 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
650 if (copy->intrinsic != nir_intrinsic_copy_deref)
651 continue;
652
653 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
654 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
655
656 struct array_var_info *dst_info =
657 get_array_deref_info(dst_deref, var_info_map, modes);
658 struct array_var_info *src_info =
659 get_array_deref_info(src_deref, var_info_map, modes);
660
661 if (!src_info && !dst_info)
662 continue;
663
664 nir_deref_path dst_path, src_path;
665 nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
666 nir_deref_path_init(&src_path, src_deref, mem_ctx);
667
668 if (!deref_has_split_wildcard(&dst_path, dst_info) &&
669 !deref_has_split_wildcard(&src_path, src_info))
670 continue;
671
672 b.cursor = nir_instr_remove(&copy->instr);
673
674 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
675 src_info, &src_path, 0, src_path.path[0]);
676 }
677 }
678 }
679
680 static void
681 split_array_access_impl(nir_function_impl *impl,
682 struct hash_table *var_info_map,
683 nir_variable_mode modes,
684 void *mem_ctx)
685 {
686 nir_builder b;
687 nir_builder_init(&b, impl);
688
689 nir_foreach_block(block, impl) {
690 nir_foreach_instr_safe(instr, block) {
691 if (instr->type == nir_instr_type_deref) {
692 /* Clean up any dead derefs we find lying around. They may refer
693 * to variables we're planning to split.
694 */
695 nir_deref_instr *deref = nir_instr_as_deref(instr);
696 if (deref->mode & modes)
697 nir_deref_instr_remove_if_unused(deref);
698 continue;
699 }
700
701 if (instr->type != nir_instr_type_intrinsic)
702 continue;
703
704 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
705 if (intrin->intrinsic != nir_intrinsic_load_deref &&
706 intrin->intrinsic != nir_intrinsic_store_deref &&
707 intrin->intrinsic != nir_intrinsic_copy_deref)
708 continue;
709
710 const unsigned num_derefs =
711 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
712
713 for (unsigned d = 0; d < num_derefs; d++) {
714 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
715
716 struct array_var_info *info =
717 get_array_deref_info(deref, var_info_map, modes);
718 if (!info)
719 continue;
720
721 nir_deref_path path;
722 nir_deref_path_init(&path, deref, mem_ctx);
723
724 b.cursor = nir_before_instr(&intrin->instr);
725
726 if (array_path_is_out_of_bounds(&path, info)) {
727 /* If one of the derefs is out-of-bounds, we just delete the
728 * instruction. If a destination is out of bounds, then it may
729 * have been in-bounds prior to shrinking so we don't want to
730 * accidentally stomp something. However, we've already proven
731 * that it will never be read so it's safe to delete. If a
732 * source is out of bounds then it is loading random garbage.
733 * For loads, we replace their uses with an undef instruction
734 * and for copies we just delete the copy since it was writing
735 * undefined garbage anyway and we may as well leave the random
736 * garbage in the destination alone.
737 */
738 if (intrin->intrinsic == nir_intrinsic_load_deref) {
739 nir_ssa_def *u =
740 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
741 intrin->dest.ssa.bit_size);
742 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
743 nir_src_for_ssa(u));
744 }
745 nir_instr_remove(&intrin->instr);
746 for (unsigned i = 0; i < num_derefs; i++)
747 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
748 break;
749 }
750
751 struct array_split *split = &info->root_split;
752 for (unsigned i = 0; i < info->num_levels; i++) {
753 if (info->levels[i].split) {
754 nir_deref_instr *p = path.path[i + 1];
755 unsigned index = nir_src_as_uint(p->arr.index);
756 assert(index < info->levels[i].array_len);
757 split = &split->splits[index];
758 }
759 }
760 assert(!split->splits && split->var);
761
762 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
763 for (unsigned i = 0; i < info->num_levels; i++) {
764 if (!info->levels[i].split) {
765 new_deref = nir_build_deref_follower(&b, new_deref,
766 path.path[i + 1]);
767 }
768 }
769 assert(new_deref->type == deref->type);
770
771 /* Rewrite the deref source to point to the split one */
772 nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
773 nir_src_for_ssa(&new_deref->dest.ssa));
774 nir_deref_instr_remove_if_unused(deref);
775 }
776 }
777 }
778 }
779
780 /** A pass for splitting arrays of vectors into multiple variables
781 *
782 * This pass looks at arrays (possibly multiple levels) of vectors (not
783 * structures or other types) and tries to split them into piles of variables,
784 * one for each array element. The heuristic used is simple: If a given array
785 * level is never used with an indirect, that array level will get split.
786 *
787 * This pass probably could handles structures easily enough but making a pass
788 * that could see through an array of structures of arrays would be difficult
789 * so it's best to just run nir_split_struct_vars first.
790 */
791 bool
792 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
793 {
794 void *mem_ctx = ralloc_context(NULL);
795 struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
796
797 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
798
799 bool has_global_array = false;
800 if (modes & nir_var_shader_temp) {
801 has_global_array = init_var_list_array_infos(&shader->globals,
802 var_info_map, mem_ctx);
803 }
804
805 bool has_any_array = false;
806 nir_foreach_function(function, shader) {
807 if (!function->impl)
808 continue;
809
810 bool has_local_array = false;
811 if (modes & nir_var_function_temp) {
812 has_local_array = init_var_list_array_infos(&function->impl->locals,
813 var_info_map, mem_ctx);
814 }
815
816 if (has_global_array || has_local_array) {
817 has_any_array = true;
818 mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
819 }
820 }
821
822 /* If we failed to find any arrays of arrays, bail early. */
823 if (!has_any_array) {
824 ralloc_free(mem_ctx);
825 return false;
826 }
827
828 bool has_global_splits = false;
829 if (modes & nir_var_shader_temp) {
830 has_global_splits = split_var_list_arrays(shader, NULL,
831 &shader->globals,
832 var_info_map, mem_ctx);
833 }
834
835 bool progress = false;
836 nir_foreach_function(function, shader) {
837 if (!function->impl)
838 continue;
839
840 bool has_local_splits = false;
841 if (modes & nir_var_function_temp) {
842 has_local_splits = split_var_list_arrays(shader, function->impl,
843 &function->impl->locals,
844 var_info_map, mem_ctx);
845 }
846
847 if (has_global_splits || has_local_splits) {
848 split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
849 split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
850
851 nir_metadata_preserve(function->impl, nir_metadata_block_index |
852 nir_metadata_dominance);
853 progress = true;
854 }
855 }
856
857 ralloc_free(mem_ctx);
858
859 return progress;
860 }
861
862 struct array_level_usage {
863 unsigned array_len;
864
865 /* The value UINT_MAX will be used to indicate an indirect */
866 unsigned max_read;
867 unsigned max_written;
868
869 /* True if there is a copy that isn't to/from a shrinkable array */
870 bool has_external_copy;
871 struct set *levels_copied;
872 };
873
874 struct vec_var_usage {
875 /* Convenience set of all components this variable has */
876 nir_component_mask_t all_comps;
877
878 nir_component_mask_t comps_read;
879 nir_component_mask_t comps_written;
880
881 nir_component_mask_t comps_kept;
882
883 /* True if there is a copy that isn't to/from a shrinkable vector */
884 bool has_external_copy;
885 struct set *vars_copied;
886
887 unsigned num_levels;
888 struct array_level_usage levels[0];
889 };
890
891 static struct vec_var_usage *
892 get_vec_var_usage(nir_variable *var,
893 struct hash_table *var_usage_map,
894 bool add_usage_entry, void *mem_ctx)
895 {
896 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
897 if (entry)
898 return entry->data;
899
900 if (!add_usage_entry)
901 return NULL;
902
903 /* Check to make sure that we are working with an array of vectors. We
904 * don't bother to shrink single vectors because we figure that we can
905 * clean it up better with SSA than by inserting piles of vecN instructions
906 * to compact results.
907 */
908 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
909 if (num_levels < 1)
910 return NULL; /* Not an array of vectors */
911
912 struct vec_var_usage *usage =
913 rzalloc_size(mem_ctx, sizeof(*usage) +
914 num_levels * sizeof(usage->levels[0]));
915
916 usage->num_levels = num_levels;
917 const struct glsl_type *type = var->type;
918 for (unsigned i = 0; i < num_levels; i++) {
919 usage->levels[i].array_len = glsl_get_length(type);
920 assert(glsl_get_explicit_stride(type) == 0);
921 type = glsl_get_array_element(type);
922 }
923 assert(glsl_type_is_vector_or_scalar(type));
924
925 usage->all_comps = (1 << glsl_get_components(type)) - 1;
926
927 _mesa_hash_table_insert(var_usage_map, var, usage);
928
929 return usage;
930 }
931
932 static struct vec_var_usage *
933 get_vec_deref_usage(nir_deref_instr *deref,
934 struct hash_table *var_usage_map,
935 nir_variable_mode modes,
936 bool add_usage_entry, void *mem_ctx)
937 {
938 if (!(deref->mode & modes))
939 return NULL;
940
941 return get_vec_var_usage(nir_deref_instr_get_variable(deref),
942 var_usage_map, add_usage_entry, mem_ctx);
943 }
944
945 static void
946 mark_deref_used(nir_deref_instr *deref,
947 nir_component_mask_t comps_read,
948 nir_component_mask_t comps_written,
949 nir_deref_instr *copy_deref,
950 struct hash_table *var_usage_map,
951 nir_variable_mode modes,
952 void *mem_ctx)
953 {
954 if (!(deref->mode & modes))
955 return;
956
957 nir_variable *var = nir_deref_instr_get_variable(deref);
958
959 struct vec_var_usage *usage =
960 get_vec_var_usage(var, var_usage_map, true, mem_ctx);
961 if (!usage)
962 return;
963
964 usage->comps_read |= comps_read & usage->all_comps;
965 usage->comps_written |= comps_written & usage->all_comps;
966
967 struct vec_var_usage *copy_usage = NULL;
968 if (copy_deref) {
969 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
970 true, mem_ctx);
971 if (copy_usage) {
972 if (usage->vars_copied == NULL) {
973 usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
974 }
975 _mesa_set_add(usage->vars_copied, copy_usage);
976 } else {
977 usage->has_external_copy = true;
978 }
979 }
980
981 nir_deref_path path;
982 nir_deref_path_init(&path, deref, mem_ctx);
983
984 nir_deref_path copy_path;
985 if (copy_usage)
986 nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
987
988 unsigned copy_i = 0;
989 for (unsigned i = 0; i < usage->num_levels; i++) {
990 struct array_level_usage *level = &usage->levels[i];
991 nir_deref_instr *deref = path.path[i + 1];
992 assert(deref->deref_type == nir_deref_type_array ||
993 deref->deref_type == nir_deref_type_array_wildcard);
994
995 unsigned max_used;
996 if (deref->deref_type == nir_deref_type_array) {
997 max_used = nir_src_is_const(deref->arr.index) ?
998 nir_src_as_uint(deref->arr.index) : UINT_MAX;
999 } else {
1000 /* For wildcards, we read or wrote the whole thing. */
1001 assert(deref->deref_type == nir_deref_type_array_wildcard);
1002 max_used = level->array_len - 1;
1003
1004 if (copy_usage) {
1005 /* Match each wildcard level with the level on copy_usage */
1006 for (; copy_path.path[copy_i + 1]; copy_i++) {
1007 if (copy_path.path[copy_i + 1]->deref_type ==
1008 nir_deref_type_array_wildcard)
1009 break;
1010 }
1011 struct array_level_usage *copy_level =
1012 &copy_usage->levels[copy_i++];
1013
1014 if (level->levels_copied == NULL) {
1015 level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1016 }
1017 _mesa_set_add(level->levels_copied, copy_level);
1018 } else {
1019 /* We have a wildcard and it comes from a variable we aren't
1020 * tracking; flag it and we'll know to not shorten this array.
1021 */
1022 level->has_external_copy = true;
1023 }
1024 }
1025
1026 if (comps_written)
1027 level->max_written = MAX2(level->max_written, max_used);
1028 if (comps_read)
1029 level->max_read = MAX2(level->max_read, max_used);
1030 }
1031 }
1032
1033 static bool
1034 src_is_load_deref(nir_src src, nir_src deref_src)
1035 {
1036 assert(src.is_ssa);
1037 assert(deref_src.is_ssa);
1038
1039 if (src.ssa->parent_instr->type != nir_instr_type_intrinsic)
1040 return false;
1041
1042 nir_intrinsic_instr *load = nir_instr_as_intrinsic(src.ssa->parent_instr);
1043 if (load->intrinsic != nir_intrinsic_load_deref)
1044 return false;
1045
1046 assert(load->src[0].is_ssa);
1047
1048 return load->src[0].ssa == deref_src.ssa;
1049 }
1050
1051 /* Returns all non-self-referential components of a store instruction. A
1052 * component is self-referential if it comes from the same component of a load
1053 * instruction on the same deref. If the only data in a particular component
1054 * of a variable came directly from that component then it's undefined. The
1055 * only way to get defined data into a component of a variable is for it to
1056 * get written there by something outside or from a different component.
1057 *
1058 * This is a fairly common pattern in shaders that come from either GLSL IR or
1059 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1060 * load-vec-store.
1061 */
1062 static nir_component_mask_t
1063 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1064 {
1065 nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1066
1067 assert(store->src[1].is_ssa);
1068 nir_instr *src_instr = store->src[1].ssa->parent_instr;
1069 if (src_instr->type != nir_instr_type_alu)
1070 return comps;
1071
1072 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1073
1074 if (src_alu->op == nir_op_imov ||
1075 src_alu->op == nir_op_fmov) {
1076 /* If it's just a swizzle of a load from the same deref, discount any
1077 * channels that don't move in the swizzle.
1078 */
1079 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1080 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1081 if (src_alu->src[0].swizzle[i] == i)
1082 comps &= ~(1u << i);
1083 }
1084 }
1085 } else if (src_alu->op == nir_op_vec2 ||
1086 src_alu->op == nir_op_vec3 ||
1087 src_alu->op == nir_op_vec4) {
1088 /* If it's a vec, discount any channels that are just loads from the
1089 * same deref put in the same spot.
1090 */
1091 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1092 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1093 src_alu->src[i].swizzle[0] == i)
1094 comps &= ~(1u << i);
1095 }
1096 }
1097
1098 return comps;
1099 }
1100
1101 static void
1102 find_used_components_impl(nir_function_impl *impl,
1103 struct hash_table *var_usage_map,
1104 nir_variable_mode modes,
1105 void *mem_ctx)
1106 {
1107 nir_foreach_block(block, impl) {
1108 nir_foreach_instr(instr, block) {
1109 if (instr->type != nir_instr_type_intrinsic)
1110 continue;
1111
1112 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1113 switch (intrin->intrinsic) {
1114 case nir_intrinsic_load_deref:
1115 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1116 nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1117 NULL, var_usage_map, modes, mem_ctx);
1118 break;
1119
1120 case nir_intrinsic_store_deref:
1121 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1122 0, get_non_self_referential_store_comps(intrin),
1123 NULL, var_usage_map, modes, mem_ctx);
1124 break;
1125
1126 case nir_intrinsic_copy_deref: {
1127 /* Just mark everything used for copies. */
1128 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1129 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1130 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1131 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1132 break;
1133 }
1134
1135 default:
1136 break;
1137 }
1138 }
1139 }
1140 }
1141
1142 static bool
1143 shrink_vec_var_list(struct exec_list *vars,
1144 struct hash_table *var_usage_map)
1145 {
1146 /* Initialize the components kept field of each variable. This is the
1147 * AND of the components written and components read. If a component is
1148 * written but never read, it's dead. If it is read but never written,
1149 * then all values read are undefined garbage and we may as well not read
1150 * them.
1151 *
1152 * The same logic applies to the array length. We make the array length
1153 * the minimum needed required length between read and write and plan to
1154 * discard any OOB access. The one exception here is indirect writes
1155 * because we don't know where they will land and we can't shrink an array
1156 * with indirect writes because previously in-bounds writes may become
1157 * out-of-bounds and have undefined behavior.
1158 *
1159 * Also, if we have a copy that to/from something we can't shrink, we need
1160 * to leave components and array_len of any wildcards alone.
1161 */
1162 nir_foreach_variable(var, vars) {
1163 struct vec_var_usage *usage =
1164 get_vec_var_usage(var, var_usage_map, false, NULL);
1165 if (!usage)
1166 continue;
1167
1168 assert(usage->comps_kept == 0);
1169 if (usage->has_external_copy)
1170 usage->comps_kept = usage->all_comps;
1171 else
1172 usage->comps_kept = usage->comps_read & usage->comps_written;
1173
1174 for (unsigned i = 0; i < usage->num_levels; i++) {
1175 struct array_level_usage *level = &usage->levels[i];
1176 assert(level->array_len > 0);
1177
1178 if (level->max_written == UINT_MAX || level->has_external_copy)
1179 continue; /* Can't shrink */
1180
1181 unsigned max_used = MIN2(level->max_read, level->max_written);
1182 level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1183 }
1184 }
1185
1186 /* In order for variable copies to work, we have to have the same data type
1187 * on the source and the destination. In order to satisfy this, we run a
1188 * little fixed-point algorithm to transitively ensure that we get enough
1189 * components and array elements for this to hold for all copies.
1190 */
1191 bool fp_progress;
1192 do {
1193 fp_progress = false;
1194 nir_foreach_variable(var, vars) {
1195 struct vec_var_usage *var_usage =
1196 get_vec_var_usage(var, var_usage_map, false, NULL);
1197 if (!var_usage || !var_usage->vars_copied)
1198 continue;
1199
1200 set_foreach(var_usage->vars_copied, copy_entry) {
1201 struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1202 if (copy_usage->comps_kept != var_usage->comps_kept) {
1203 nir_component_mask_t comps_kept =
1204 (var_usage->comps_kept | copy_usage->comps_kept);
1205 var_usage->comps_kept = comps_kept;
1206 copy_usage->comps_kept = comps_kept;
1207 fp_progress = true;
1208 }
1209 }
1210
1211 for (unsigned i = 0; i < var_usage->num_levels; i++) {
1212 struct array_level_usage *var_level = &var_usage->levels[i];
1213 if (!var_level->levels_copied)
1214 continue;
1215
1216 set_foreach(var_level->levels_copied, copy_entry) {
1217 struct array_level_usage *copy_level = (void *)copy_entry->key;
1218 if (var_level->array_len != copy_level->array_len) {
1219 unsigned array_len =
1220 MAX2(var_level->array_len, copy_level->array_len);
1221 var_level->array_len = array_len;
1222 copy_level->array_len = array_len;
1223 fp_progress = true;
1224 }
1225 }
1226 }
1227 }
1228 } while (fp_progress);
1229
1230 bool vars_shrunk = false;
1231 nir_foreach_variable_safe(var, vars) {
1232 struct vec_var_usage *usage =
1233 get_vec_var_usage(var, var_usage_map, false, NULL);
1234 if (!usage)
1235 continue;
1236
1237 bool shrunk = false;
1238 const struct glsl_type *vec_type = var->type;
1239 for (unsigned i = 0; i < usage->num_levels; i++) {
1240 /* If we've reduced the array to zero elements at some level, just
1241 * set comps_kept to 0 and delete the variable.
1242 */
1243 if (usage->levels[i].array_len == 0) {
1244 usage->comps_kept = 0;
1245 break;
1246 }
1247
1248 assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1249 if (usage->levels[i].array_len < glsl_get_length(vec_type))
1250 shrunk = true;
1251 vec_type = glsl_get_array_element(vec_type);
1252 }
1253 assert(glsl_type_is_vector_or_scalar(vec_type));
1254
1255 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1256 if (usage->comps_kept != usage->all_comps)
1257 shrunk = true;
1258
1259 if (usage->comps_kept == 0) {
1260 /* This variable is dead, remove it */
1261 vars_shrunk = true;
1262 exec_node_remove(&var->node);
1263 continue;
1264 }
1265
1266 if (!shrunk) {
1267 /* This variable doesn't need to be shrunk. Remove it from the
1268 * hash table so later steps will ignore it.
1269 */
1270 _mesa_hash_table_remove_key(var_usage_map, var);
1271 continue;
1272 }
1273
1274 /* Build the new var type */
1275 unsigned new_num_comps = util_bitcount(usage->comps_kept);
1276 const struct glsl_type *new_type =
1277 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1278 for (int i = usage->num_levels - 1; i >= 0; i--) {
1279 assert(usage->levels[i].array_len > 0);
1280 /* If the original type was a matrix type, we'd like to keep that so
1281 * we don't convert matrices into arrays.
1282 */
1283 if (i == usage->num_levels - 1 &&
1284 glsl_type_is_matrix(glsl_without_array(var->type)) &&
1285 new_num_comps > 1 && usage->levels[i].array_len > 1) {
1286 new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1287 new_num_comps,
1288 usage->levels[i].array_len);
1289 } else {
1290 new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1291 }
1292 }
1293 var->type = new_type;
1294
1295 vars_shrunk = true;
1296 }
1297
1298 return vars_shrunk;
1299 }
1300
1301 static bool
1302 vec_deref_is_oob(nir_deref_instr *deref,
1303 struct vec_var_usage *usage)
1304 {
1305 nir_deref_path path;
1306 nir_deref_path_init(&path, deref, NULL);
1307
1308 bool oob = false;
1309 for (unsigned i = 0; i < usage->num_levels; i++) {
1310 nir_deref_instr *p = path.path[i + 1];
1311 if (p->deref_type == nir_deref_type_array_wildcard)
1312 continue;
1313
1314 if (nir_src_is_const(p->arr.index) &&
1315 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1316 oob = true;
1317 break;
1318 }
1319 }
1320
1321 nir_deref_path_finish(&path);
1322
1323 return oob;
1324 }
1325
1326 static bool
1327 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1328 struct hash_table *var_usage_map,
1329 nir_variable_mode modes)
1330 {
1331 struct vec_var_usage *usage =
1332 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1333 if (!usage)
1334 return false;
1335
1336 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1337 }
1338
1339 static void
1340 shrink_vec_var_access_impl(nir_function_impl *impl,
1341 struct hash_table *var_usage_map,
1342 nir_variable_mode modes)
1343 {
1344 nir_builder b;
1345 nir_builder_init(&b, impl);
1346
1347 nir_foreach_block(block, impl) {
1348 nir_foreach_instr_safe(instr, block) {
1349 switch (instr->type) {
1350 case nir_instr_type_deref: {
1351 nir_deref_instr *deref = nir_instr_as_deref(instr);
1352 if (!(deref->mode & modes))
1353 break;
1354
1355 /* Clean up any dead derefs we find lying around. They may refer
1356 * to variables we've deleted.
1357 */
1358 if (nir_deref_instr_remove_if_unused(deref))
1359 break;
1360
1361 /* Update the type in the deref to keep the types consistent as
1362 * you walk down the chain. We don't need to check if this is one
1363 * of the derefs we're shrinking because this is a no-op if it
1364 * isn't. The worst that could happen is that we accidentally fix
1365 * an invalid deref.
1366 */
1367 if (deref->deref_type == nir_deref_type_var) {
1368 deref->type = deref->var->type;
1369 } else if (deref->deref_type == nir_deref_type_array ||
1370 deref->deref_type == nir_deref_type_array_wildcard) {
1371 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1372 assert(glsl_type_is_array(parent->type) ||
1373 glsl_type_is_matrix(parent->type));
1374 deref->type = glsl_get_array_element(parent->type);
1375 }
1376 break;
1377 }
1378
1379 case nir_instr_type_intrinsic: {
1380 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1381
1382 /* If we have a copy whose source or destination has been deleted
1383 * because we determined the variable was dead, then we just
1384 * delete the copy instruction. If the source variable was dead
1385 * then it was writing undefined garbage anyway and if it's the
1386 * destination variable that's dead then the write isn't needed.
1387 */
1388 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1389 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1390 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1391 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1392 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1393 nir_instr_remove(&intrin->instr);
1394 nir_deref_instr_remove_if_unused(dst);
1395 nir_deref_instr_remove_if_unused(src);
1396 }
1397 continue;
1398 }
1399
1400 if (intrin->intrinsic != nir_intrinsic_load_deref &&
1401 intrin->intrinsic != nir_intrinsic_store_deref)
1402 continue;
1403
1404 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1405 if (!(deref->mode & modes))
1406 continue;
1407
1408 struct vec_var_usage *usage =
1409 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1410 if (!usage)
1411 continue;
1412
1413 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1414 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1415 nir_ssa_def *u =
1416 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1417 intrin->dest.ssa.bit_size);
1418 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1419 nir_src_for_ssa(u));
1420 }
1421 nir_instr_remove(&intrin->instr);
1422 nir_deref_instr_remove_if_unused(deref);
1423 continue;
1424 }
1425
1426 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1427 b.cursor = nir_after_instr(&intrin->instr);
1428
1429 nir_ssa_def *undef =
1430 nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1431 nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1432 unsigned c = 0;
1433 for (unsigned i = 0; i < intrin->num_components; i++) {
1434 if (usage->comps_kept & (1u << i))
1435 vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1436 else
1437 vec_srcs[i] = undef;
1438 }
1439 nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1440
1441 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1442 nir_src_for_ssa(vec),
1443 vec->parent_instr);
1444
1445 /* The SSA def is now only used by the swizzle. It's safe to
1446 * shrink the number of components.
1447 */
1448 assert(list_length(&intrin->dest.ssa.uses) == c);
1449 intrin->num_components = c;
1450 intrin->dest.ssa.num_components = c;
1451 } else {
1452 nir_component_mask_t write_mask =
1453 nir_intrinsic_write_mask(intrin);
1454
1455 unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1456 nir_component_mask_t new_write_mask = 0;
1457 unsigned c = 0;
1458 for (unsigned i = 0; i < intrin->num_components; i++) {
1459 if (usage->comps_kept & (1u << i)) {
1460 swizzle[c] = i;
1461 if (write_mask & (1u << i))
1462 new_write_mask |= 1u << c;
1463 c++;
1464 }
1465 }
1466
1467 b.cursor = nir_before_instr(&intrin->instr);
1468
1469 nir_ssa_def *swizzled =
1470 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false);
1471
1472 /* Rewrite to use the compacted source */
1473 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1474 nir_src_for_ssa(swizzled));
1475 nir_intrinsic_set_write_mask(intrin, new_write_mask);
1476 intrin->num_components = c;
1477 }
1478 break;
1479 }
1480
1481 default:
1482 break;
1483 }
1484 }
1485 }
1486 }
1487
1488 static bool
1489 function_impl_has_vars_with_modes(nir_function_impl *impl,
1490 nir_variable_mode modes)
1491 {
1492 nir_shader *shader = impl->function->shader;
1493
1494 if ((modes & nir_var_shader_temp) && !exec_list_is_empty(&shader->globals))
1495 return true;
1496
1497 if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1498 return true;
1499
1500 return false;
1501 }
1502
1503 /** Attempt to shrink arrays of vectors
1504 *
1505 * This pass looks at variables which contain a vector or an array (possibly
1506 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1507 * or array. If the pass can prove that a component of a vector (or array of
1508 * vectors) is never really used, then that component will be removed.
1509 * Similarly, the pass attempts to shorten arrays based on what elements it
1510 * can prove are never read or never contain valid data.
1511 */
1512 bool
1513 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1514 {
1515 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1516
1517 void *mem_ctx = ralloc_context(NULL);
1518
1519 struct hash_table *var_usage_map =
1520 _mesa_pointer_hash_table_create(mem_ctx);
1521
1522 bool has_vars_to_shrink = false;
1523 nir_foreach_function(function, shader) {
1524 if (!function->impl)
1525 continue;
1526
1527 /* Don't even bother crawling the IR if we don't have any variables.
1528 * Given that this pass deletes any unused variables, it's likely that
1529 * we will be in this scenario eventually.
1530 */
1531 if (function_impl_has_vars_with_modes(function->impl, modes)) {
1532 has_vars_to_shrink = true;
1533 find_used_components_impl(function->impl, var_usage_map,
1534 modes, mem_ctx);
1535 }
1536 }
1537 if (!has_vars_to_shrink) {
1538 ralloc_free(mem_ctx);
1539 return false;
1540 }
1541
1542 bool globals_shrunk = false;
1543 if (modes & nir_var_shader_temp)
1544 globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
1545
1546 bool progress = false;
1547 nir_foreach_function(function, shader) {
1548 if (!function->impl)
1549 continue;
1550
1551 bool locals_shrunk = false;
1552 if (modes & nir_var_function_temp) {
1553 locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1554 var_usage_map);
1555 }
1556
1557 if (globals_shrunk || locals_shrunk) {
1558 shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1559
1560 nir_metadata_preserve(function->impl, nir_metadata_block_index |
1561 nir_metadata_dominance);
1562 progress = true;
1563 }
1564 }
1565
1566 ralloc_free(mem_ctx);
1567
1568 return progress;
1569 }