nir: fix memleak in error path
[mesa.git] / src / compiler / nir / nir_split_vars.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28
29 #include "util/set.h"
30 #include "util/u_math.h"
31
32 static struct set *
33 get_complex_used_vars(nir_shader *shader, void *mem_ctx)
34 {
35 struct set *complex_vars = _mesa_pointer_set_create(mem_ctx);
36
37 nir_foreach_function(function, shader) {
38 if (!function->impl)
39 continue;
40
41 nir_foreach_block(block, function->impl) {
42 nir_foreach_instr(instr, block) {
43 if (instr->type != nir_instr_type_deref)
44 continue;
45
46 nir_deref_instr *deref = nir_instr_as_deref(instr);
47
48 /* We only need to consider var derefs because
49 * nir_deref_instr_has_complex_use is recursive.
50 */
51 if (deref->deref_type == nir_deref_type_var &&
52 nir_deref_instr_has_complex_use(deref))
53 _mesa_set_add(complex_vars, deref->var);
54 }
55 }
56 }
57
58 return complex_vars;
59 }
60
61 struct split_var_state {
62 void *mem_ctx;
63
64 nir_shader *shader;
65 nir_function_impl *impl;
66
67 nir_variable *base_var;
68 };
69
70 struct field {
71 struct field *parent;
72
73 const struct glsl_type *type;
74
75 unsigned num_fields;
76 struct field *fields;
77
78 nir_variable *var;
79 };
80
81 static const struct glsl_type *
82 wrap_type_in_array(const struct glsl_type *type,
83 const struct glsl_type *array_type)
84 {
85 if (!glsl_type_is_array(array_type))
86 return type;
87
88 const struct glsl_type *elem_type =
89 wrap_type_in_array(type, glsl_get_array_element(array_type));
90 assert(glsl_get_explicit_stride(array_type) == 0);
91 return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
92 }
93
94 static int
95 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
96 {
97 int num_levels = 0;
98 while (true) {
99 if (glsl_type_is_array_or_matrix(type)) {
100 num_levels++;
101 type = glsl_get_array_element(type);
102 } else if (glsl_type_is_vector_or_scalar(type)) {
103 return num_levels;
104 } else {
105 /* Not an array of vectors */
106 return -1;
107 }
108 }
109 }
110
111 static void
112 init_field_for_type(struct field *field, struct field *parent,
113 const struct glsl_type *type,
114 const char *name,
115 struct split_var_state *state)
116 {
117 *field = (struct field) {
118 .parent = parent,
119 .type = type,
120 };
121
122 const struct glsl_type *struct_type = glsl_without_array(type);
123 if (glsl_type_is_struct_or_ifc(struct_type)) {
124 field->num_fields = glsl_get_length(struct_type),
125 field->fields = ralloc_array(state->mem_ctx, struct field,
126 field->num_fields);
127 for (unsigned i = 0; i < field->num_fields; i++) {
128 char *field_name = NULL;
129 if (name) {
130 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
131 glsl_get_struct_elem_name(struct_type, i));
132 } else {
133 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
134 glsl_get_type_name(struct_type),
135 glsl_get_struct_elem_name(struct_type, i));
136 }
137 init_field_for_type(&field->fields[i], field,
138 glsl_get_struct_field(struct_type, i),
139 field_name, state);
140 }
141 } else {
142 const struct glsl_type *var_type = type;
143 for (struct field *f = field->parent; f; f = f->parent)
144 var_type = wrap_type_in_array(var_type, f->type);
145
146 nir_variable_mode mode = state->base_var->data.mode;
147 if (mode == nir_var_function_temp) {
148 field->var = nir_local_variable_create(state->impl, var_type, name);
149 } else {
150 field->var = nir_variable_create(state->shader, mode, var_type, name);
151 }
152 }
153 }
154
155 static bool
156 split_var_list_structs(nir_shader *shader,
157 nir_function_impl *impl,
158 struct exec_list *vars,
159 struct hash_table *var_field_map,
160 struct set **complex_vars,
161 void *mem_ctx)
162 {
163 struct split_var_state state = {
164 .mem_ctx = mem_ctx,
165 .shader = shader,
166 .impl = impl,
167 };
168
169 struct exec_list split_vars;
170 exec_list_make_empty(&split_vars);
171
172 /* To avoid list confusion (we'll be adding things as we split variables),
173 * pull all of the variables we plan to split off of the list
174 */
175 nir_foreach_variable_safe(var, vars) {
176 if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
177 continue;
178
179 if (*complex_vars == NULL)
180 *complex_vars = get_complex_used_vars(shader, mem_ctx);
181
182 /* We can't split a variable that's referenced with deref that has any
183 * sort of complex usage.
184 */
185 if (_mesa_set_search(*complex_vars, var))
186 continue;
187
188 exec_node_remove(&var->node);
189 exec_list_push_tail(&split_vars, &var->node);
190 }
191
192 nir_foreach_variable(var, &split_vars) {
193 state.base_var = var;
194
195 struct field *root_field = ralloc(mem_ctx, struct field);
196 init_field_for_type(root_field, NULL, var->type, var->name, &state);
197 _mesa_hash_table_insert(var_field_map, var, root_field);
198 }
199
200 return !exec_list_is_empty(&split_vars);
201 }
202
203 static void
204 split_struct_derefs_impl(nir_function_impl *impl,
205 struct hash_table *var_field_map,
206 nir_variable_mode modes,
207 void *mem_ctx)
208 {
209 nir_builder b;
210 nir_builder_init(&b, impl);
211
212 nir_foreach_block(block, impl) {
213 nir_foreach_instr_safe(instr, block) {
214 if (instr->type != nir_instr_type_deref)
215 continue;
216
217 nir_deref_instr *deref = nir_instr_as_deref(instr);
218 if (!(deref->mode & modes))
219 continue;
220
221 /* Clean up any dead derefs we find lying around. They may refer to
222 * variables we're planning to split.
223 */
224 if (nir_deref_instr_remove_if_unused(deref))
225 continue;
226
227 if (!glsl_type_is_vector_or_scalar(deref->type))
228 continue;
229
230 nir_variable *base_var = nir_deref_instr_get_variable(deref);
231 struct hash_entry *entry =
232 _mesa_hash_table_search(var_field_map, base_var);
233 if (!entry)
234 continue;
235
236 struct field *root_field = entry->data;
237
238 nir_deref_path path;
239 nir_deref_path_init(&path, deref, mem_ctx);
240
241 struct field *tail_field = root_field;
242 for (unsigned i = 0; path.path[i]; i++) {
243 if (path.path[i]->deref_type != nir_deref_type_struct)
244 continue;
245
246 assert(i > 0);
247 assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
248 assert(path.path[i - 1]->type ==
249 glsl_without_array(tail_field->type));
250
251 tail_field = &tail_field->fields[path.path[i]->strct.index];
252 }
253 nir_variable *split_var = tail_field->var;
254
255 nir_deref_instr *new_deref = NULL;
256 for (unsigned i = 0; path.path[i]; i++) {
257 nir_deref_instr *p = path.path[i];
258 b.cursor = nir_after_instr(&p->instr);
259
260 switch (p->deref_type) {
261 case nir_deref_type_var:
262 assert(new_deref == NULL);
263 new_deref = nir_build_deref_var(&b, split_var);
264 break;
265
266 case nir_deref_type_array:
267 case nir_deref_type_array_wildcard:
268 new_deref = nir_build_deref_follower(&b, new_deref, p);
269 break;
270
271 case nir_deref_type_struct:
272 /* Nothing to do; we're splitting structs */
273 break;
274
275 default:
276 unreachable("Invalid deref type in path");
277 }
278 }
279
280 assert(new_deref->type == deref->type);
281 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
282 nir_src_for_ssa(&new_deref->dest.ssa));
283 nir_deref_instr_remove_if_unused(deref);
284 }
285 }
286 }
287
288 /** A pass for splitting structs into multiple variables
289 *
290 * This pass splits arrays of structs into multiple variables, one for each
291 * (possibly nested) structure member. After this pass completes, no
292 * variables of the given mode will contain a struct type.
293 */
294 bool
295 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
296 {
297 void *mem_ctx = ralloc_context(NULL);
298 struct hash_table *var_field_map =
299 _mesa_pointer_hash_table_create(mem_ctx);
300 struct set *complex_vars = NULL;
301
302 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
303
304 bool has_global_splits = false;
305 if (modes & nir_var_shader_temp) {
306 has_global_splits = split_var_list_structs(shader, NULL,
307 &shader->globals,
308 var_field_map,
309 &complex_vars,
310 mem_ctx);
311 }
312
313 bool progress = false;
314 nir_foreach_function(function, shader) {
315 if (!function->impl)
316 continue;
317
318 bool has_local_splits = false;
319 if (modes & nir_var_function_temp) {
320 has_local_splits = split_var_list_structs(shader, function->impl,
321 &function->impl->locals,
322 var_field_map,
323 &complex_vars,
324 mem_ctx);
325 }
326
327 if (has_global_splits || has_local_splits) {
328 split_struct_derefs_impl(function->impl, var_field_map,
329 modes, mem_ctx);
330
331 nir_metadata_preserve(function->impl, nir_metadata_block_index |
332 nir_metadata_dominance);
333 progress = true;
334 }
335 }
336
337 ralloc_free(mem_ctx);
338
339 return progress;
340 }
341
342 struct array_level_info {
343 unsigned array_len;
344 bool split;
345 };
346
347 struct array_split {
348 /* Only set if this is the tail end of the splitting */
349 nir_variable *var;
350
351 unsigned num_splits;
352 struct array_split *splits;
353 };
354
355 struct array_var_info {
356 nir_variable *base_var;
357
358 const struct glsl_type *split_var_type;
359
360 bool split_var;
361 struct array_split root_split;
362
363 unsigned num_levels;
364 struct array_level_info levels[0];
365 };
366
367 static bool
368 init_var_list_array_infos(nir_shader *shader,
369 struct exec_list *vars,
370 struct hash_table *var_info_map,
371 struct set **complex_vars,
372 void *mem_ctx)
373 {
374 bool has_array = false;
375
376 nir_foreach_variable(var, vars) {
377 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
378 if (num_levels <= 0)
379 continue;
380
381 if (*complex_vars == NULL)
382 *complex_vars = get_complex_used_vars(shader, mem_ctx);
383
384 /* We can't split a variable that's referenced with deref that has any
385 * sort of complex usage.
386 */
387 if (_mesa_set_search(*complex_vars, var))
388 continue;
389
390 struct array_var_info *info =
391 rzalloc_size(mem_ctx, sizeof(*info) +
392 num_levels * sizeof(info->levels[0]));
393
394 info->base_var = var;
395 info->num_levels = num_levels;
396
397 const struct glsl_type *type = var->type;
398 for (int i = 0; i < num_levels; i++) {
399 info->levels[i].array_len = glsl_get_length(type);
400 type = glsl_get_array_element(type);
401
402 /* All levels start out initially as split */
403 info->levels[i].split = true;
404 }
405
406 _mesa_hash_table_insert(var_info_map, var, info);
407 has_array = true;
408 }
409
410 return has_array;
411 }
412
413 static struct array_var_info *
414 get_array_var_info(nir_variable *var,
415 struct hash_table *var_info_map)
416 {
417 struct hash_entry *entry =
418 _mesa_hash_table_search(var_info_map, var);
419 return entry ? entry->data : NULL;
420 }
421
422 static struct array_var_info *
423 get_array_deref_info(nir_deref_instr *deref,
424 struct hash_table *var_info_map,
425 nir_variable_mode modes)
426 {
427 if (!(deref->mode & modes))
428 return NULL;
429
430 return get_array_var_info(nir_deref_instr_get_variable(deref),
431 var_info_map);
432 }
433
434 static void
435 mark_array_deref_used(nir_deref_instr *deref,
436 struct hash_table *var_info_map,
437 nir_variable_mode modes,
438 void *mem_ctx)
439 {
440 struct array_var_info *info =
441 get_array_deref_info(deref, var_info_map, modes);
442 if (!info)
443 return;
444
445 nir_deref_path path;
446 nir_deref_path_init(&path, deref, mem_ctx);
447
448 /* Walk the path and look for indirects. If we have an array deref with an
449 * indirect, mark the given level as not being split.
450 */
451 for (unsigned i = 0; i < info->num_levels; i++) {
452 nir_deref_instr *p = path.path[i + 1];
453 if (p->deref_type == nir_deref_type_array &&
454 !nir_src_is_const(p->arr.index))
455 info->levels[i].split = false;
456 }
457 }
458
459 static void
460 mark_array_usage_impl(nir_function_impl *impl,
461 struct hash_table *var_info_map,
462 nir_variable_mode modes,
463 void *mem_ctx)
464 {
465 nir_foreach_block(block, impl) {
466 nir_foreach_instr(instr, block) {
467 if (instr->type != nir_instr_type_intrinsic)
468 continue;
469
470 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
471 switch (intrin->intrinsic) {
472 case nir_intrinsic_copy_deref:
473 mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
474 var_info_map, modes, mem_ctx);
475 /* Fall Through */
476
477 case nir_intrinsic_load_deref:
478 case nir_intrinsic_store_deref:
479 mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
480 var_info_map, modes, mem_ctx);
481 break;
482
483 default:
484 break;
485 }
486 }
487 }
488 }
489
490 static void
491 create_split_array_vars(struct array_var_info *var_info,
492 unsigned level,
493 struct array_split *split,
494 const char *name,
495 nir_shader *shader,
496 nir_function_impl *impl,
497 void *mem_ctx)
498 {
499 while (level < var_info->num_levels && !var_info->levels[level].split) {
500 name = ralloc_asprintf(mem_ctx, "%s[*]", name);
501 level++;
502 }
503
504 if (level == var_info->num_levels) {
505 /* We add parens to the variable name so it looks like "(foo[2][*])" so
506 * that further derefs will look like "(foo[2][*])[ssa_6]"
507 */
508 name = ralloc_asprintf(mem_ctx, "(%s)", name);
509
510 nir_variable_mode mode = var_info->base_var->data.mode;
511 if (mode == nir_var_function_temp) {
512 split->var = nir_local_variable_create(impl,
513 var_info->split_var_type, name);
514 } else {
515 split->var = nir_variable_create(shader, mode,
516 var_info->split_var_type, name);
517 }
518 } else {
519 assert(var_info->levels[level].split);
520 split->num_splits = var_info->levels[level].array_len;
521 split->splits = rzalloc_array(mem_ctx, struct array_split,
522 split->num_splits);
523 for (unsigned i = 0; i < split->num_splits; i++) {
524 create_split_array_vars(var_info, level + 1, &split->splits[i],
525 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
526 shader, impl, mem_ctx);
527 }
528 }
529 }
530
531 static bool
532 split_var_list_arrays(nir_shader *shader,
533 nir_function_impl *impl,
534 struct exec_list *vars,
535 struct hash_table *var_info_map,
536 void *mem_ctx)
537 {
538 struct exec_list split_vars;
539 exec_list_make_empty(&split_vars);
540
541 nir_foreach_variable_safe(var, vars) {
542 struct array_var_info *info = get_array_var_info(var, var_info_map);
543 if (!info)
544 continue;
545
546 bool has_split = false;
547 const struct glsl_type *split_type =
548 glsl_without_array_or_matrix(var->type);
549 for (int i = info->num_levels - 1; i >= 0; i--) {
550 if (info->levels[i].split) {
551 has_split = true;
552 continue;
553 }
554
555 /* If the original type was a matrix type, we'd like to keep that so
556 * we don't convert matrices into arrays.
557 */
558 if (i == info->num_levels - 1 &&
559 glsl_type_is_matrix(glsl_without_array(var->type))) {
560 split_type = glsl_matrix_type(glsl_get_base_type(split_type),
561 glsl_get_components(split_type),
562 info->levels[i].array_len);
563 } else {
564 split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
565 }
566 }
567
568 if (has_split) {
569 info->split_var_type = split_type;
570 /* To avoid list confusion (we'll be adding things as we split
571 * variables), pull all of the variables we plan to split off of the
572 * main variable list.
573 */
574 exec_node_remove(&var->node);
575 exec_list_push_tail(&split_vars, &var->node);
576 } else {
577 assert(split_type == glsl_get_bare_type(var->type));
578 /* If we're not modifying this variable, delete the info so we skip
579 * it faster in later passes.
580 */
581 _mesa_hash_table_remove_key(var_info_map, var);
582 }
583 }
584
585 nir_foreach_variable(var, &split_vars) {
586 struct array_var_info *info = get_array_var_info(var, var_info_map);
587 create_split_array_vars(info, 0, &info->root_split, var->name,
588 shader, impl, mem_ctx);
589 }
590
591 return !exec_list_is_empty(&split_vars);
592 }
593
594 static bool
595 deref_has_split_wildcard(nir_deref_path *path,
596 struct array_var_info *info)
597 {
598 if (info == NULL)
599 return false;
600
601 assert(path->path[0]->var == info->base_var);
602 for (unsigned i = 0; i < info->num_levels; i++) {
603 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
604 info->levels[i].split)
605 return true;
606 }
607
608 return false;
609 }
610
611 static bool
612 array_path_is_out_of_bounds(nir_deref_path *path,
613 struct array_var_info *info)
614 {
615 if (info == NULL)
616 return false;
617
618 assert(path->path[0]->var == info->base_var);
619 for (unsigned i = 0; i < info->num_levels; i++) {
620 nir_deref_instr *p = path->path[i + 1];
621 if (p->deref_type == nir_deref_type_array_wildcard)
622 continue;
623
624 if (nir_src_is_const(p->arr.index) &&
625 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
626 return true;
627 }
628
629 return false;
630 }
631
632 static void
633 emit_split_copies(nir_builder *b,
634 struct array_var_info *dst_info, nir_deref_path *dst_path,
635 unsigned dst_level, nir_deref_instr *dst,
636 struct array_var_info *src_info, nir_deref_path *src_path,
637 unsigned src_level, nir_deref_instr *src)
638 {
639 nir_deref_instr *dst_p, *src_p;
640
641 while ((dst_p = dst_path->path[dst_level + 1])) {
642 if (dst_p->deref_type == nir_deref_type_array_wildcard)
643 break;
644
645 dst = nir_build_deref_follower(b, dst, dst_p);
646 dst_level++;
647 }
648
649 while ((src_p = src_path->path[src_level + 1])) {
650 if (src_p->deref_type == nir_deref_type_array_wildcard)
651 break;
652
653 src = nir_build_deref_follower(b, src, src_p);
654 src_level++;
655 }
656
657 if (src_p == NULL || dst_p == NULL) {
658 assert(src_p == NULL && dst_p == NULL);
659 nir_copy_deref(b, dst, src);
660 } else {
661 assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
662 src_p->deref_type == nir_deref_type_array_wildcard);
663
664 if ((dst_info && dst_info->levels[dst_level].split) ||
665 (src_info && src_info->levels[src_level].split)) {
666 /* There are no indirects at this level on one of the source or the
667 * destination so we are lowering it.
668 */
669 assert(glsl_get_length(dst_path->path[dst_level]->type) ==
670 glsl_get_length(src_path->path[src_level]->type));
671 unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
672 for (unsigned i = 0; i < len; i++) {
673 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
674 nir_build_deref_array_imm(b, dst, i),
675 src_info, src_path, src_level + 1,
676 nir_build_deref_array_imm(b, src, i));
677 }
678 } else {
679 /* Neither side is being split so we just keep going */
680 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
681 nir_build_deref_array_wildcard(b, dst),
682 src_info, src_path, src_level + 1,
683 nir_build_deref_array_wildcard(b, src));
684 }
685 }
686 }
687
688 static void
689 split_array_copies_impl(nir_function_impl *impl,
690 struct hash_table *var_info_map,
691 nir_variable_mode modes,
692 void *mem_ctx)
693 {
694 nir_builder b;
695 nir_builder_init(&b, impl);
696
697 nir_foreach_block(block, impl) {
698 nir_foreach_instr_safe(instr, block) {
699 if (instr->type != nir_instr_type_intrinsic)
700 continue;
701
702 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
703 if (copy->intrinsic != nir_intrinsic_copy_deref)
704 continue;
705
706 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
707 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
708
709 struct array_var_info *dst_info =
710 get_array_deref_info(dst_deref, var_info_map, modes);
711 struct array_var_info *src_info =
712 get_array_deref_info(src_deref, var_info_map, modes);
713
714 if (!src_info && !dst_info)
715 continue;
716
717 nir_deref_path dst_path, src_path;
718 nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
719 nir_deref_path_init(&src_path, src_deref, mem_ctx);
720
721 if (!deref_has_split_wildcard(&dst_path, dst_info) &&
722 !deref_has_split_wildcard(&src_path, src_info))
723 continue;
724
725 b.cursor = nir_instr_remove(&copy->instr);
726
727 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
728 src_info, &src_path, 0, src_path.path[0]);
729 }
730 }
731 }
732
733 static void
734 split_array_access_impl(nir_function_impl *impl,
735 struct hash_table *var_info_map,
736 nir_variable_mode modes,
737 void *mem_ctx)
738 {
739 nir_builder b;
740 nir_builder_init(&b, impl);
741
742 nir_foreach_block(block, impl) {
743 nir_foreach_instr_safe(instr, block) {
744 if (instr->type == nir_instr_type_deref) {
745 /* Clean up any dead derefs we find lying around. They may refer
746 * to variables we're planning to split.
747 */
748 nir_deref_instr *deref = nir_instr_as_deref(instr);
749 if (deref->mode & modes)
750 nir_deref_instr_remove_if_unused(deref);
751 continue;
752 }
753
754 if (instr->type != nir_instr_type_intrinsic)
755 continue;
756
757 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
758 if (intrin->intrinsic != nir_intrinsic_load_deref &&
759 intrin->intrinsic != nir_intrinsic_store_deref &&
760 intrin->intrinsic != nir_intrinsic_copy_deref)
761 continue;
762
763 const unsigned num_derefs =
764 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
765
766 for (unsigned d = 0; d < num_derefs; d++) {
767 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
768
769 struct array_var_info *info =
770 get_array_deref_info(deref, var_info_map, modes);
771 if (!info)
772 continue;
773
774 nir_deref_path path;
775 nir_deref_path_init(&path, deref, mem_ctx);
776
777 b.cursor = nir_before_instr(&intrin->instr);
778
779 if (array_path_is_out_of_bounds(&path, info)) {
780 /* If one of the derefs is out-of-bounds, we just delete the
781 * instruction. If a destination is out of bounds, then it may
782 * have been in-bounds prior to shrinking so we don't want to
783 * accidentally stomp something. However, we've already proven
784 * that it will never be read so it's safe to delete. If a
785 * source is out of bounds then it is loading random garbage.
786 * For loads, we replace their uses with an undef instruction
787 * and for copies we just delete the copy since it was writing
788 * undefined garbage anyway and we may as well leave the random
789 * garbage in the destination alone.
790 */
791 if (intrin->intrinsic == nir_intrinsic_load_deref) {
792 nir_ssa_def *u =
793 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
794 intrin->dest.ssa.bit_size);
795 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
796 nir_src_for_ssa(u));
797 }
798 nir_instr_remove(&intrin->instr);
799 for (unsigned i = 0; i < num_derefs; i++)
800 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
801 break;
802 }
803
804 struct array_split *split = &info->root_split;
805 for (unsigned i = 0; i < info->num_levels; i++) {
806 if (info->levels[i].split) {
807 nir_deref_instr *p = path.path[i + 1];
808 unsigned index = nir_src_as_uint(p->arr.index);
809 assert(index < info->levels[i].array_len);
810 split = &split->splits[index];
811 }
812 }
813 assert(!split->splits && split->var);
814
815 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
816 for (unsigned i = 0; i < info->num_levels; i++) {
817 if (!info->levels[i].split) {
818 new_deref = nir_build_deref_follower(&b, new_deref,
819 path.path[i + 1]);
820 }
821 }
822 assert(new_deref->type == deref->type);
823
824 /* Rewrite the deref source to point to the split one */
825 nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
826 nir_src_for_ssa(&new_deref->dest.ssa));
827 nir_deref_instr_remove_if_unused(deref);
828 }
829 }
830 }
831 }
832
833 /** A pass for splitting arrays of vectors into multiple variables
834 *
835 * This pass looks at arrays (possibly multiple levels) of vectors (not
836 * structures or other types) and tries to split them into piles of variables,
837 * one for each array element. The heuristic used is simple: If a given array
838 * level is never used with an indirect, that array level will get split.
839 *
840 * This pass probably could handles structures easily enough but making a pass
841 * that could see through an array of structures of arrays would be difficult
842 * so it's best to just run nir_split_struct_vars first.
843 */
844 bool
845 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
846 {
847 void *mem_ctx = ralloc_context(NULL);
848 struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
849 struct set *complex_vars = NULL;
850
851 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
852
853 bool has_global_array = false;
854 if (modes & nir_var_shader_temp) {
855 has_global_array = init_var_list_array_infos(shader,
856 &shader->globals,
857 var_info_map,
858 &complex_vars,
859 mem_ctx);
860 }
861
862 bool has_any_array = false;
863 nir_foreach_function(function, shader) {
864 if (!function->impl)
865 continue;
866
867 bool has_local_array = false;
868 if (modes & nir_var_function_temp) {
869 has_local_array = init_var_list_array_infos(shader,
870 &function->impl->locals,
871 var_info_map,
872 &complex_vars,
873 mem_ctx);
874 }
875
876 if (has_global_array || has_local_array) {
877 has_any_array = true;
878 mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
879 }
880 }
881
882 /* If we failed to find any arrays of arrays, bail early. */
883 if (!has_any_array) {
884 ralloc_free(mem_ctx);
885 return false;
886 }
887
888 bool has_global_splits = false;
889 if (modes & nir_var_shader_temp) {
890 has_global_splits = split_var_list_arrays(shader, NULL,
891 &shader->globals,
892 var_info_map, mem_ctx);
893 }
894
895 bool progress = false;
896 nir_foreach_function(function, shader) {
897 if (!function->impl)
898 continue;
899
900 bool has_local_splits = false;
901 if (modes & nir_var_function_temp) {
902 has_local_splits = split_var_list_arrays(shader, function->impl,
903 &function->impl->locals,
904 var_info_map, mem_ctx);
905 }
906
907 if (has_global_splits || has_local_splits) {
908 split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
909 split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
910
911 nir_metadata_preserve(function->impl, nir_metadata_block_index |
912 nir_metadata_dominance);
913 progress = true;
914 }
915 }
916
917 ralloc_free(mem_ctx);
918
919 return progress;
920 }
921
922 struct array_level_usage {
923 unsigned array_len;
924
925 /* The value UINT_MAX will be used to indicate an indirect */
926 unsigned max_read;
927 unsigned max_written;
928
929 /* True if there is a copy that isn't to/from a shrinkable array */
930 bool has_external_copy;
931 struct set *levels_copied;
932 };
933
934 struct vec_var_usage {
935 /* Convenience set of all components this variable has */
936 nir_component_mask_t all_comps;
937
938 nir_component_mask_t comps_read;
939 nir_component_mask_t comps_written;
940
941 nir_component_mask_t comps_kept;
942
943 /* True if there is a copy that isn't to/from a shrinkable vector */
944 bool has_external_copy;
945 bool has_complex_use;
946 struct set *vars_copied;
947
948 unsigned num_levels;
949 struct array_level_usage levels[0];
950 };
951
952 static struct vec_var_usage *
953 get_vec_var_usage(nir_variable *var,
954 struct hash_table *var_usage_map,
955 bool add_usage_entry, void *mem_ctx)
956 {
957 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
958 if (entry)
959 return entry->data;
960
961 if (!add_usage_entry)
962 return NULL;
963
964 /* Check to make sure that we are working with an array of vectors. We
965 * don't bother to shrink single vectors because we figure that we can
966 * clean it up better with SSA than by inserting piles of vecN instructions
967 * to compact results.
968 */
969 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
970 if (num_levels < 1)
971 return NULL; /* Not an array of vectors */
972
973 struct vec_var_usage *usage =
974 rzalloc_size(mem_ctx, sizeof(*usage) +
975 num_levels * sizeof(usage->levels[0]));
976
977 usage->num_levels = num_levels;
978 const struct glsl_type *type = var->type;
979 for (unsigned i = 0; i < num_levels; i++) {
980 usage->levels[i].array_len = glsl_get_length(type);
981 type = glsl_get_array_element(type);
982 }
983 assert(glsl_type_is_vector_or_scalar(type));
984
985 usage->all_comps = (1 << glsl_get_components(type)) - 1;
986
987 _mesa_hash_table_insert(var_usage_map, var, usage);
988
989 return usage;
990 }
991
992 static struct vec_var_usage *
993 get_vec_deref_usage(nir_deref_instr *deref,
994 struct hash_table *var_usage_map,
995 nir_variable_mode modes,
996 bool add_usage_entry, void *mem_ctx)
997 {
998 if (!(deref->mode & modes))
999 return NULL;
1000
1001 return get_vec_var_usage(nir_deref_instr_get_variable(deref),
1002 var_usage_map, add_usage_entry, mem_ctx);
1003 }
1004
1005 static void
1006 mark_deref_if_complex(nir_deref_instr *deref,
1007 struct hash_table *var_usage_map,
1008 nir_variable_mode modes,
1009 void *mem_ctx)
1010 {
1011 if (!(deref->mode & modes))
1012 return;
1013
1014 /* Only bother with var derefs because nir_deref_instr_has_complex_use is
1015 * recursive.
1016 */
1017 if (deref->deref_type != nir_deref_type_var)
1018 return;
1019
1020 if (!nir_deref_instr_has_complex_use(deref))
1021 return;
1022
1023 struct vec_var_usage *usage =
1024 get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx);
1025 if (!usage)
1026 return;
1027
1028 usage->has_complex_use = true;
1029 }
1030
1031 static void
1032 mark_deref_used(nir_deref_instr *deref,
1033 nir_component_mask_t comps_read,
1034 nir_component_mask_t comps_written,
1035 nir_deref_instr *copy_deref,
1036 struct hash_table *var_usage_map,
1037 nir_variable_mode modes,
1038 void *mem_ctx)
1039 {
1040 if (!(deref->mode & modes))
1041 return;
1042
1043 nir_variable *var = nir_deref_instr_get_variable(deref);
1044 if (var == NULL)
1045 return;
1046
1047 struct vec_var_usage *usage =
1048 get_vec_var_usage(var, var_usage_map, true, mem_ctx);
1049 if (!usage)
1050 return;
1051
1052 usage->comps_read |= comps_read & usage->all_comps;
1053 usage->comps_written |= comps_written & usage->all_comps;
1054
1055 struct vec_var_usage *copy_usage = NULL;
1056 if (copy_deref) {
1057 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
1058 true, mem_ctx);
1059 if (copy_usage) {
1060 if (usage->vars_copied == NULL) {
1061 usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
1062 }
1063 _mesa_set_add(usage->vars_copied, copy_usage);
1064 } else {
1065 usage->has_external_copy = true;
1066 }
1067 }
1068
1069 nir_deref_path path;
1070 nir_deref_path_init(&path, deref, mem_ctx);
1071
1072 nir_deref_path copy_path;
1073 if (copy_usage)
1074 nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
1075
1076 unsigned copy_i = 0;
1077 for (unsigned i = 0; i < usage->num_levels; i++) {
1078 struct array_level_usage *level = &usage->levels[i];
1079 nir_deref_instr *deref = path.path[i + 1];
1080 assert(deref->deref_type == nir_deref_type_array ||
1081 deref->deref_type == nir_deref_type_array_wildcard);
1082
1083 unsigned max_used;
1084 if (deref->deref_type == nir_deref_type_array) {
1085 max_used = nir_src_is_const(deref->arr.index) ?
1086 nir_src_as_uint(deref->arr.index) : UINT_MAX;
1087 } else {
1088 /* For wildcards, we read or wrote the whole thing. */
1089 assert(deref->deref_type == nir_deref_type_array_wildcard);
1090 max_used = level->array_len - 1;
1091
1092 if (copy_usage) {
1093 /* Match each wildcard level with the level on copy_usage */
1094 for (; copy_path.path[copy_i + 1]; copy_i++) {
1095 if (copy_path.path[copy_i + 1]->deref_type ==
1096 nir_deref_type_array_wildcard)
1097 break;
1098 }
1099 struct array_level_usage *copy_level =
1100 &copy_usage->levels[copy_i++];
1101
1102 if (level->levels_copied == NULL) {
1103 level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1104 }
1105 _mesa_set_add(level->levels_copied, copy_level);
1106 } else {
1107 /* We have a wildcard and it comes from a variable we aren't
1108 * tracking; flag it and we'll know to not shorten this array.
1109 */
1110 level->has_external_copy = true;
1111 }
1112 }
1113
1114 if (comps_written)
1115 level->max_written = MAX2(level->max_written, max_used);
1116 if (comps_read)
1117 level->max_read = MAX2(level->max_read, max_used);
1118 }
1119 }
1120
1121 static bool
1122 src_is_load_deref(nir_src src, nir_src deref_src)
1123 {
1124 nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1125 if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1126 return false;
1127
1128 assert(load->src[0].is_ssa);
1129
1130 return load->src[0].ssa == deref_src.ssa;
1131 }
1132
1133 /* Returns all non-self-referential components of a store instruction. A
1134 * component is self-referential if it comes from the same component of a load
1135 * instruction on the same deref. If the only data in a particular component
1136 * of a variable came directly from that component then it's undefined. The
1137 * only way to get defined data into a component of a variable is for it to
1138 * get written there by something outside or from a different component.
1139 *
1140 * This is a fairly common pattern in shaders that come from either GLSL IR or
1141 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1142 * load-vec-store.
1143 */
1144 static nir_component_mask_t
1145 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1146 {
1147 nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1148
1149 assert(store->src[1].is_ssa);
1150 nir_instr *src_instr = store->src[1].ssa->parent_instr;
1151 if (src_instr->type != nir_instr_type_alu)
1152 return comps;
1153
1154 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1155
1156 if (src_alu->op == nir_op_mov) {
1157 /* If it's just a swizzle of a load from the same deref, discount any
1158 * channels that don't move in the swizzle.
1159 */
1160 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1161 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1162 if (src_alu->src[0].swizzle[i] == i)
1163 comps &= ~(1u << i);
1164 }
1165 }
1166 } else if (src_alu->op == nir_op_vec2 ||
1167 src_alu->op == nir_op_vec3 ||
1168 src_alu->op == nir_op_vec4) {
1169 /* If it's a vec, discount any channels that are just loads from the
1170 * same deref put in the same spot.
1171 */
1172 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1173 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1174 src_alu->src[i].swizzle[0] == i)
1175 comps &= ~(1u << i);
1176 }
1177 }
1178
1179 return comps;
1180 }
1181
1182 static void
1183 find_used_components_impl(nir_function_impl *impl,
1184 struct hash_table *var_usage_map,
1185 nir_variable_mode modes,
1186 void *mem_ctx)
1187 {
1188 nir_foreach_block(block, impl) {
1189 nir_foreach_instr(instr, block) {
1190 if (instr->type == nir_instr_type_deref) {
1191 mark_deref_if_complex(nir_instr_as_deref(instr),
1192 var_usage_map, modes, mem_ctx);
1193 }
1194
1195 if (instr->type != nir_instr_type_intrinsic)
1196 continue;
1197
1198 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1199 switch (intrin->intrinsic) {
1200 case nir_intrinsic_load_deref:
1201 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1202 nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1203 NULL, var_usage_map, modes, mem_ctx);
1204 break;
1205
1206 case nir_intrinsic_store_deref:
1207 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1208 0, get_non_self_referential_store_comps(intrin),
1209 NULL, var_usage_map, modes, mem_ctx);
1210 break;
1211
1212 case nir_intrinsic_copy_deref: {
1213 /* Just mark everything used for copies. */
1214 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1215 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1216 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1217 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1218 break;
1219 }
1220
1221 default:
1222 break;
1223 }
1224 }
1225 }
1226 }
1227
1228 static bool
1229 shrink_vec_var_list(struct exec_list *vars,
1230 struct hash_table *var_usage_map)
1231 {
1232 /* Initialize the components kept field of each variable. This is the
1233 * AND of the components written and components read. If a component is
1234 * written but never read, it's dead. If it is read but never written,
1235 * then all values read are undefined garbage and we may as well not read
1236 * them.
1237 *
1238 * The same logic applies to the array length. We make the array length
1239 * the minimum needed required length between read and write and plan to
1240 * discard any OOB access. The one exception here is indirect writes
1241 * because we don't know where they will land and we can't shrink an array
1242 * with indirect writes because previously in-bounds writes may become
1243 * out-of-bounds and have undefined behavior.
1244 *
1245 * Also, if we have a copy that to/from something we can't shrink, we need
1246 * to leave components and array_len of any wildcards alone.
1247 */
1248 nir_foreach_variable(var, vars) {
1249 struct vec_var_usage *usage =
1250 get_vec_var_usage(var, var_usage_map, false, NULL);
1251 if (!usage)
1252 continue;
1253
1254 assert(usage->comps_kept == 0);
1255 if (usage->has_external_copy || usage->has_complex_use)
1256 usage->comps_kept = usage->all_comps;
1257 else
1258 usage->comps_kept = usage->comps_read & usage->comps_written;
1259
1260 for (unsigned i = 0; i < usage->num_levels; i++) {
1261 struct array_level_usage *level = &usage->levels[i];
1262 assert(level->array_len > 0);
1263
1264 if (level->max_written == UINT_MAX || level->has_external_copy ||
1265 usage->has_complex_use)
1266 continue; /* Can't shrink */
1267
1268 unsigned max_used = MIN2(level->max_read, level->max_written);
1269 level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1270 }
1271 }
1272
1273 /* In order for variable copies to work, we have to have the same data type
1274 * on the source and the destination. In order to satisfy this, we run a
1275 * little fixed-point algorithm to transitively ensure that we get enough
1276 * components and array elements for this to hold for all copies.
1277 */
1278 bool fp_progress;
1279 do {
1280 fp_progress = false;
1281 nir_foreach_variable(var, vars) {
1282 struct vec_var_usage *var_usage =
1283 get_vec_var_usage(var, var_usage_map, false, NULL);
1284 if (!var_usage || !var_usage->vars_copied)
1285 continue;
1286
1287 set_foreach(var_usage->vars_copied, copy_entry) {
1288 struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1289 if (copy_usage->comps_kept != var_usage->comps_kept) {
1290 nir_component_mask_t comps_kept =
1291 (var_usage->comps_kept | copy_usage->comps_kept);
1292 var_usage->comps_kept = comps_kept;
1293 copy_usage->comps_kept = comps_kept;
1294 fp_progress = true;
1295 }
1296 }
1297
1298 for (unsigned i = 0; i < var_usage->num_levels; i++) {
1299 struct array_level_usage *var_level = &var_usage->levels[i];
1300 if (!var_level->levels_copied)
1301 continue;
1302
1303 set_foreach(var_level->levels_copied, copy_entry) {
1304 struct array_level_usage *copy_level = (void *)copy_entry->key;
1305 if (var_level->array_len != copy_level->array_len) {
1306 unsigned array_len =
1307 MAX2(var_level->array_len, copy_level->array_len);
1308 var_level->array_len = array_len;
1309 copy_level->array_len = array_len;
1310 fp_progress = true;
1311 }
1312 }
1313 }
1314 }
1315 } while (fp_progress);
1316
1317 bool vars_shrunk = false;
1318 nir_foreach_variable_safe(var, vars) {
1319 struct vec_var_usage *usage =
1320 get_vec_var_usage(var, var_usage_map, false, NULL);
1321 if (!usage)
1322 continue;
1323
1324 bool shrunk = false;
1325 const struct glsl_type *vec_type = var->type;
1326 for (unsigned i = 0; i < usage->num_levels; i++) {
1327 /* If we've reduced the array to zero elements at some level, just
1328 * set comps_kept to 0 and delete the variable.
1329 */
1330 if (usage->levels[i].array_len == 0) {
1331 usage->comps_kept = 0;
1332 break;
1333 }
1334
1335 assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1336 if (usage->levels[i].array_len < glsl_get_length(vec_type))
1337 shrunk = true;
1338 vec_type = glsl_get_array_element(vec_type);
1339 }
1340 assert(glsl_type_is_vector_or_scalar(vec_type));
1341
1342 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1343 if (usage->comps_kept != usage->all_comps)
1344 shrunk = true;
1345
1346 if (usage->comps_kept == 0) {
1347 /* This variable is dead, remove it */
1348 vars_shrunk = true;
1349 exec_node_remove(&var->node);
1350 continue;
1351 }
1352
1353 if (!shrunk) {
1354 /* This variable doesn't need to be shrunk. Remove it from the
1355 * hash table so later steps will ignore it.
1356 */
1357 _mesa_hash_table_remove_key(var_usage_map, var);
1358 continue;
1359 }
1360
1361 /* Build the new var type */
1362 unsigned new_num_comps = util_bitcount(usage->comps_kept);
1363 const struct glsl_type *new_type =
1364 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1365 for (int i = usage->num_levels - 1; i >= 0; i--) {
1366 assert(usage->levels[i].array_len > 0);
1367 /* If the original type was a matrix type, we'd like to keep that so
1368 * we don't convert matrices into arrays.
1369 */
1370 if (i == usage->num_levels - 1 &&
1371 glsl_type_is_matrix(glsl_without_array(var->type)) &&
1372 new_num_comps > 1 && usage->levels[i].array_len > 1) {
1373 new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1374 new_num_comps,
1375 usage->levels[i].array_len);
1376 } else {
1377 new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1378 }
1379 }
1380 var->type = new_type;
1381
1382 vars_shrunk = true;
1383 }
1384
1385 return vars_shrunk;
1386 }
1387
1388 static bool
1389 vec_deref_is_oob(nir_deref_instr *deref,
1390 struct vec_var_usage *usage)
1391 {
1392 nir_deref_path path;
1393 nir_deref_path_init(&path, deref, NULL);
1394
1395 bool oob = false;
1396 for (unsigned i = 0; i < usage->num_levels; i++) {
1397 nir_deref_instr *p = path.path[i + 1];
1398 if (p->deref_type == nir_deref_type_array_wildcard)
1399 continue;
1400
1401 if (nir_src_is_const(p->arr.index) &&
1402 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1403 oob = true;
1404 break;
1405 }
1406 }
1407
1408 nir_deref_path_finish(&path);
1409
1410 return oob;
1411 }
1412
1413 static bool
1414 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1415 struct hash_table *var_usage_map,
1416 nir_variable_mode modes)
1417 {
1418 struct vec_var_usage *usage =
1419 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1420 if (!usage)
1421 return false;
1422
1423 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1424 }
1425
1426 static void
1427 shrink_vec_var_access_impl(nir_function_impl *impl,
1428 struct hash_table *var_usage_map,
1429 nir_variable_mode modes)
1430 {
1431 nir_builder b;
1432 nir_builder_init(&b, impl);
1433
1434 nir_foreach_block(block, impl) {
1435 nir_foreach_instr_safe(instr, block) {
1436 switch (instr->type) {
1437 case nir_instr_type_deref: {
1438 nir_deref_instr *deref = nir_instr_as_deref(instr);
1439 if (!(deref->mode & modes))
1440 break;
1441
1442 /* Clean up any dead derefs we find lying around. They may refer
1443 * to variables we've deleted.
1444 */
1445 if (nir_deref_instr_remove_if_unused(deref))
1446 break;
1447
1448 /* Update the type in the deref to keep the types consistent as
1449 * you walk down the chain. We don't need to check if this is one
1450 * of the derefs we're shrinking because this is a no-op if it
1451 * isn't. The worst that could happen is that we accidentally fix
1452 * an invalid deref.
1453 */
1454 if (deref->deref_type == nir_deref_type_var) {
1455 deref->type = deref->var->type;
1456 } else if (deref->deref_type == nir_deref_type_array ||
1457 deref->deref_type == nir_deref_type_array_wildcard) {
1458 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1459 assert(glsl_type_is_array(parent->type) ||
1460 glsl_type_is_matrix(parent->type));
1461 deref->type = glsl_get_array_element(parent->type);
1462 }
1463 break;
1464 }
1465
1466 case nir_instr_type_intrinsic: {
1467 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1468
1469 /* If we have a copy whose source or destination has been deleted
1470 * because we determined the variable was dead, then we just
1471 * delete the copy instruction. If the source variable was dead
1472 * then it was writing undefined garbage anyway and if it's the
1473 * destination variable that's dead then the write isn't needed.
1474 */
1475 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1476 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1477 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1478 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1479 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1480 nir_instr_remove(&intrin->instr);
1481 nir_deref_instr_remove_if_unused(dst);
1482 nir_deref_instr_remove_if_unused(src);
1483 }
1484 continue;
1485 }
1486
1487 if (intrin->intrinsic != nir_intrinsic_load_deref &&
1488 intrin->intrinsic != nir_intrinsic_store_deref)
1489 continue;
1490
1491 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1492 if (!(deref->mode & modes))
1493 continue;
1494
1495 struct vec_var_usage *usage =
1496 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1497 if (!usage)
1498 continue;
1499
1500 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1501 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1502 nir_ssa_def *u =
1503 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1504 intrin->dest.ssa.bit_size);
1505 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1506 nir_src_for_ssa(u));
1507 }
1508 nir_instr_remove(&intrin->instr);
1509 nir_deref_instr_remove_if_unused(deref);
1510 continue;
1511 }
1512
1513 /* If we're not dropping any components, there's no need to
1514 * compact vectors.
1515 */
1516 if (usage->comps_kept == usage->all_comps)
1517 continue;
1518
1519 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1520 b.cursor = nir_after_instr(&intrin->instr);
1521
1522 nir_ssa_def *undef =
1523 nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1524 nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1525 unsigned c = 0;
1526 for (unsigned i = 0; i < intrin->num_components; i++) {
1527 if (usage->comps_kept & (1u << i))
1528 vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1529 else
1530 vec_srcs[i] = undef;
1531 }
1532 nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1533
1534 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1535 nir_src_for_ssa(vec),
1536 vec->parent_instr);
1537
1538 /* The SSA def is now only used by the swizzle. It's safe to
1539 * shrink the number of components.
1540 */
1541 assert(list_length(&intrin->dest.ssa.uses) == c);
1542 intrin->num_components = c;
1543 intrin->dest.ssa.num_components = c;
1544 } else {
1545 nir_component_mask_t write_mask =
1546 nir_intrinsic_write_mask(intrin);
1547
1548 unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1549 nir_component_mask_t new_write_mask = 0;
1550 unsigned c = 0;
1551 for (unsigned i = 0; i < intrin->num_components; i++) {
1552 if (usage->comps_kept & (1u << i)) {
1553 swizzle[c] = i;
1554 if (write_mask & (1u << i))
1555 new_write_mask |= 1u << c;
1556 c++;
1557 }
1558 }
1559
1560 b.cursor = nir_before_instr(&intrin->instr);
1561
1562 nir_ssa_def *swizzled =
1563 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c);
1564
1565 /* Rewrite to use the compacted source */
1566 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1567 nir_src_for_ssa(swizzled));
1568 nir_intrinsic_set_write_mask(intrin, new_write_mask);
1569 intrin->num_components = c;
1570 }
1571 break;
1572 }
1573
1574 default:
1575 break;
1576 }
1577 }
1578 }
1579 }
1580
1581 static bool
1582 function_impl_has_vars_with_modes(nir_function_impl *impl,
1583 nir_variable_mode modes)
1584 {
1585 nir_shader *shader = impl->function->shader;
1586
1587 if ((modes & nir_var_shader_temp) && !exec_list_is_empty(&shader->globals))
1588 return true;
1589
1590 if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1591 return true;
1592
1593 return false;
1594 }
1595
1596 /** Attempt to shrink arrays of vectors
1597 *
1598 * This pass looks at variables which contain a vector or an array (possibly
1599 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1600 * or array. If the pass can prove that a component of a vector (or array of
1601 * vectors) is never really used, then that component will be removed.
1602 * Similarly, the pass attempts to shorten arrays based on what elements it
1603 * can prove are never read or never contain valid data.
1604 */
1605 bool
1606 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1607 {
1608 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1609
1610 void *mem_ctx = ralloc_context(NULL);
1611
1612 struct hash_table *var_usage_map =
1613 _mesa_pointer_hash_table_create(mem_ctx);
1614
1615 bool has_vars_to_shrink = false;
1616 nir_foreach_function(function, shader) {
1617 if (!function->impl)
1618 continue;
1619
1620 /* Don't even bother crawling the IR if we don't have any variables.
1621 * Given that this pass deletes any unused variables, it's likely that
1622 * we will be in this scenario eventually.
1623 */
1624 if (function_impl_has_vars_with_modes(function->impl, modes)) {
1625 has_vars_to_shrink = true;
1626 find_used_components_impl(function->impl, var_usage_map,
1627 modes, mem_ctx);
1628 }
1629 }
1630 if (!has_vars_to_shrink) {
1631 ralloc_free(mem_ctx);
1632 return false;
1633 }
1634
1635 bool globals_shrunk = false;
1636 if (modes & nir_var_shader_temp)
1637 globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
1638
1639 bool progress = false;
1640 nir_foreach_function(function, shader) {
1641 if (!function->impl)
1642 continue;
1643
1644 bool locals_shrunk = false;
1645 if (modes & nir_var_function_temp) {
1646 locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1647 var_usage_map);
1648 }
1649
1650 if (globals_shrunk || locals_shrunk) {
1651 shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1652
1653 nir_metadata_preserve(function->impl, nir_metadata_block_index |
1654 nir_metadata_dominance);
1655 progress = true;
1656 }
1657 }
1658
1659 ralloc_free(mem_ctx);
1660
1661 return progress;
1662 }