nir/scheduler: Move nir_scheduler to its own header
[mesa.git] / src / compiler / nir / nir_split_vars.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28
29 #include "util/set.h"
30 #include "util/u_math.h"
31
32 static struct set *
33 get_complex_used_vars(nir_shader *shader, void *mem_ctx)
34 {
35 struct set *complex_vars = _mesa_pointer_set_create(mem_ctx);
36
37 nir_foreach_function(function, shader) {
38 if (!function->impl)
39 continue;
40
41 nir_foreach_block(block, function->impl) {
42 nir_foreach_instr(instr, block) {
43 if (instr->type != nir_instr_type_deref)
44 continue;
45
46 nir_deref_instr *deref = nir_instr_as_deref(instr);
47
48 /* We only need to consider var derefs because
49 * nir_deref_instr_has_complex_use is recursive.
50 */
51 if (deref->deref_type == nir_deref_type_var &&
52 nir_deref_instr_has_complex_use(deref))
53 _mesa_set_add(complex_vars, deref->var);
54 }
55 }
56 }
57
58 return complex_vars;
59 }
60
61 struct split_var_state {
62 void *mem_ctx;
63
64 nir_shader *shader;
65 nir_function_impl *impl;
66
67 nir_variable *base_var;
68 };
69
70 struct field {
71 struct field *parent;
72
73 const struct glsl_type *type;
74
75 unsigned num_fields;
76 struct field *fields;
77
78 nir_variable *var;
79 };
80
81 static const struct glsl_type *
82 wrap_type_in_array(const struct glsl_type *type,
83 const struct glsl_type *array_type)
84 {
85 if (!glsl_type_is_array(array_type))
86 return type;
87
88 const struct glsl_type *elem_type =
89 wrap_type_in_array(type, glsl_get_array_element(array_type));
90 assert(glsl_get_explicit_stride(array_type) == 0);
91 return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
92 }
93
94 static int
95 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
96 {
97 int num_levels = 0;
98 while (true) {
99 if (glsl_type_is_array_or_matrix(type)) {
100 num_levels++;
101 type = glsl_get_array_element(type);
102 } else if (glsl_type_is_vector_or_scalar(type)) {
103 return num_levels;
104 } else {
105 /* Not an array of vectors */
106 return -1;
107 }
108 }
109 }
110
111 static void
112 init_field_for_type(struct field *field, struct field *parent,
113 const struct glsl_type *type,
114 const char *name,
115 struct split_var_state *state)
116 {
117 *field = (struct field) {
118 .parent = parent,
119 .type = type,
120 };
121
122 const struct glsl_type *struct_type = glsl_without_array(type);
123 if (glsl_type_is_struct_or_ifc(struct_type)) {
124 field->num_fields = glsl_get_length(struct_type),
125 field->fields = ralloc_array(state->mem_ctx, struct field,
126 field->num_fields);
127 for (unsigned i = 0; i < field->num_fields; i++) {
128 char *field_name = NULL;
129 if (name) {
130 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
131 glsl_get_struct_elem_name(struct_type, i));
132 } else {
133 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
134 glsl_get_type_name(struct_type),
135 glsl_get_struct_elem_name(struct_type, i));
136 }
137 init_field_for_type(&field->fields[i], field,
138 glsl_get_struct_field(struct_type, i),
139 field_name, state);
140 }
141 } else {
142 const struct glsl_type *var_type = type;
143 for (struct field *f = field->parent; f; f = f->parent)
144 var_type = wrap_type_in_array(var_type, f->type);
145
146 nir_variable_mode mode = state->base_var->data.mode;
147 if (mode == nir_var_function_temp) {
148 field->var = nir_local_variable_create(state->impl, var_type, name);
149 } else {
150 field->var = nir_variable_create(state->shader, mode, var_type, name);
151 }
152 }
153 }
154
155 static bool
156 split_var_list_structs(nir_shader *shader,
157 nir_function_impl *impl,
158 struct exec_list *vars,
159 struct hash_table *var_field_map,
160 struct set **complex_vars,
161 void *mem_ctx)
162 {
163 struct split_var_state state = {
164 .mem_ctx = mem_ctx,
165 .shader = shader,
166 .impl = impl,
167 };
168
169 struct exec_list split_vars;
170 exec_list_make_empty(&split_vars);
171
172 /* To avoid list confusion (we'll be adding things as we split variables),
173 * pull all of the variables we plan to split off of the list
174 */
175 nir_foreach_variable_safe(var, vars) {
176 if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
177 continue;
178
179 if (*complex_vars == NULL)
180 *complex_vars = get_complex_used_vars(shader, mem_ctx);
181
182 /* We can't split a variable that's referenced with deref that has any
183 * sort of complex usage.
184 */
185 if (_mesa_set_search(*complex_vars, var))
186 continue;
187
188 exec_node_remove(&var->node);
189 exec_list_push_tail(&split_vars, &var->node);
190 }
191
192 nir_foreach_variable(var, &split_vars) {
193 state.base_var = var;
194
195 struct field *root_field = ralloc(mem_ctx, struct field);
196 init_field_for_type(root_field, NULL, var->type, var->name, &state);
197 _mesa_hash_table_insert(var_field_map, var, root_field);
198 }
199
200 return !exec_list_is_empty(&split_vars);
201 }
202
203 static void
204 split_struct_derefs_impl(nir_function_impl *impl,
205 struct hash_table *var_field_map,
206 nir_variable_mode modes,
207 void *mem_ctx)
208 {
209 nir_builder b;
210 nir_builder_init(&b, impl);
211
212 nir_foreach_block(block, impl) {
213 nir_foreach_instr_safe(instr, block) {
214 if (instr->type != nir_instr_type_deref)
215 continue;
216
217 nir_deref_instr *deref = nir_instr_as_deref(instr);
218 if (!(deref->mode & modes))
219 continue;
220
221 /* Clean up any dead derefs we find lying around. They may refer to
222 * variables we're planning to split.
223 */
224 if (nir_deref_instr_remove_if_unused(deref))
225 continue;
226
227 if (!glsl_type_is_vector_or_scalar(deref->type))
228 continue;
229
230 nir_variable *base_var = nir_deref_instr_get_variable(deref);
231 struct hash_entry *entry =
232 _mesa_hash_table_search(var_field_map, base_var);
233 if (!entry)
234 continue;
235
236 struct field *root_field = entry->data;
237
238 nir_deref_path path;
239 nir_deref_path_init(&path, deref, mem_ctx);
240
241 struct field *tail_field = root_field;
242 for (unsigned i = 0; path.path[i]; i++) {
243 if (path.path[i]->deref_type != nir_deref_type_struct)
244 continue;
245
246 assert(i > 0);
247 assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
248 assert(path.path[i - 1]->type ==
249 glsl_without_array(tail_field->type));
250
251 tail_field = &tail_field->fields[path.path[i]->strct.index];
252 }
253 nir_variable *split_var = tail_field->var;
254
255 nir_deref_instr *new_deref = NULL;
256 for (unsigned i = 0; path.path[i]; i++) {
257 nir_deref_instr *p = path.path[i];
258 b.cursor = nir_after_instr(&p->instr);
259
260 switch (p->deref_type) {
261 case nir_deref_type_var:
262 assert(new_deref == NULL);
263 new_deref = nir_build_deref_var(&b, split_var);
264 break;
265
266 case nir_deref_type_array:
267 case nir_deref_type_array_wildcard:
268 new_deref = nir_build_deref_follower(&b, new_deref, p);
269 break;
270
271 case nir_deref_type_struct:
272 /* Nothing to do; we're splitting structs */
273 break;
274
275 default:
276 unreachable("Invalid deref type in path");
277 }
278 }
279
280 assert(new_deref->type == deref->type);
281 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
282 nir_src_for_ssa(&new_deref->dest.ssa));
283 nir_deref_instr_remove_if_unused(deref);
284 }
285 }
286 }
287
288 /** A pass for splitting structs into multiple variables
289 *
290 * This pass splits arrays of structs into multiple variables, one for each
291 * (possibly nested) structure member. After this pass completes, no
292 * variables of the given mode will contain a struct type.
293 */
294 bool
295 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
296 {
297 void *mem_ctx = ralloc_context(NULL);
298 struct hash_table *var_field_map =
299 _mesa_pointer_hash_table_create(mem_ctx);
300 struct set *complex_vars = NULL;
301
302 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
303
304 bool has_global_splits = false;
305 if (modes & nir_var_shader_temp) {
306 has_global_splits = split_var_list_structs(shader, NULL,
307 &shader->globals,
308 var_field_map,
309 &complex_vars,
310 mem_ctx);
311 }
312
313 bool progress = false;
314 nir_foreach_function(function, shader) {
315 if (!function->impl)
316 continue;
317
318 bool has_local_splits = false;
319 if (modes & nir_var_function_temp) {
320 has_local_splits = split_var_list_structs(shader, function->impl,
321 &function->impl->locals,
322 var_field_map,
323 &complex_vars,
324 mem_ctx);
325 }
326
327 if (has_global_splits || has_local_splits) {
328 split_struct_derefs_impl(function->impl, var_field_map,
329 modes, mem_ctx);
330
331 nir_metadata_preserve(function->impl, nir_metadata_block_index |
332 nir_metadata_dominance);
333 progress = true;
334 } else {
335 nir_metadata_preserve(function->impl, nir_metadata_all);
336 }
337 }
338
339 ralloc_free(mem_ctx);
340
341 return progress;
342 }
343
344 struct array_level_info {
345 unsigned array_len;
346 bool split;
347 };
348
349 struct array_split {
350 /* Only set if this is the tail end of the splitting */
351 nir_variable *var;
352
353 unsigned num_splits;
354 struct array_split *splits;
355 };
356
357 struct array_var_info {
358 nir_variable *base_var;
359
360 const struct glsl_type *split_var_type;
361
362 bool split_var;
363 struct array_split root_split;
364
365 unsigned num_levels;
366 struct array_level_info levels[0];
367 };
368
369 static bool
370 init_var_list_array_infos(nir_shader *shader,
371 struct exec_list *vars,
372 struct hash_table *var_info_map,
373 struct set **complex_vars,
374 void *mem_ctx)
375 {
376 bool has_array = false;
377
378 nir_foreach_variable(var, vars) {
379 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
380 if (num_levels <= 0)
381 continue;
382
383 if (*complex_vars == NULL)
384 *complex_vars = get_complex_used_vars(shader, mem_ctx);
385
386 /* We can't split a variable that's referenced with deref that has any
387 * sort of complex usage.
388 */
389 if (_mesa_set_search(*complex_vars, var))
390 continue;
391
392 struct array_var_info *info =
393 rzalloc_size(mem_ctx, sizeof(*info) +
394 num_levels * sizeof(info->levels[0]));
395
396 info->base_var = var;
397 info->num_levels = num_levels;
398
399 const struct glsl_type *type = var->type;
400 for (int i = 0; i < num_levels; i++) {
401 info->levels[i].array_len = glsl_get_length(type);
402 type = glsl_get_array_element(type);
403
404 /* All levels start out initially as split */
405 info->levels[i].split = true;
406 }
407
408 _mesa_hash_table_insert(var_info_map, var, info);
409 has_array = true;
410 }
411
412 return has_array;
413 }
414
415 static struct array_var_info *
416 get_array_var_info(nir_variable *var,
417 struct hash_table *var_info_map)
418 {
419 struct hash_entry *entry =
420 _mesa_hash_table_search(var_info_map, var);
421 return entry ? entry->data : NULL;
422 }
423
424 static struct array_var_info *
425 get_array_deref_info(nir_deref_instr *deref,
426 struct hash_table *var_info_map,
427 nir_variable_mode modes)
428 {
429 if (!(deref->mode & modes))
430 return NULL;
431
432 nir_variable *var = nir_deref_instr_get_variable(deref);
433 if (var == NULL)
434 return NULL;
435
436 return get_array_var_info(var, var_info_map);
437 }
438
439 static void
440 mark_array_deref_used(nir_deref_instr *deref,
441 struct hash_table *var_info_map,
442 nir_variable_mode modes,
443 void *mem_ctx)
444 {
445 struct array_var_info *info =
446 get_array_deref_info(deref, var_info_map, modes);
447 if (!info)
448 return;
449
450 nir_deref_path path;
451 nir_deref_path_init(&path, deref, mem_ctx);
452
453 /* Walk the path and look for indirects. If we have an array deref with an
454 * indirect, mark the given level as not being split.
455 */
456 for (unsigned i = 0; i < info->num_levels; i++) {
457 nir_deref_instr *p = path.path[i + 1];
458 if (p->deref_type == nir_deref_type_array &&
459 !nir_src_is_const(p->arr.index))
460 info->levels[i].split = false;
461 }
462 }
463
464 static void
465 mark_array_usage_impl(nir_function_impl *impl,
466 struct hash_table *var_info_map,
467 nir_variable_mode modes,
468 void *mem_ctx)
469 {
470 nir_foreach_block(block, impl) {
471 nir_foreach_instr(instr, block) {
472 if (instr->type != nir_instr_type_intrinsic)
473 continue;
474
475 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
476 switch (intrin->intrinsic) {
477 case nir_intrinsic_copy_deref:
478 mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
479 var_info_map, modes, mem_ctx);
480 /* Fall Through */
481
482 case nir_intrinsic_load_deref:
483 case nir_intrinsic_store_deref:
484 mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
485 var_info_map, modes, mem_ctx);
486 break;
487
488 default:
489 break;
490 }
491 }
492 }
493 }
494
495 static void
496 create_split_array_vars(struct array_var_info *var_info,
497 unsigned level,
498 struct array_split *split,
499 const char *name,
500 nir_shader *shader,
501 nir_function_impl *impl,
502 void *mem_ctx)
503 {
504 while (level < var_info->num_levels && !var_info->levels[level].split) {
505 name = ralloc_asprintf(mem_ctx, "%s[*]", name);
506 level++;
507 }
508
509 if (level == var_info->num_levels) {
510 /* We add parens to the variable name so it looks like "(foo[2][*])" so
511 * that further derefs will look like "(foo[2][*])[ssa_6]"
512 */
513 name = ralloc_asprintf(mem_ctx, "(%s)", name);
514
515 nir_variable_mode mode = var_info->base_var->data.mode;
516 if (mode == nir_var_function_temp) {
517 split->var = nir_local_variable_create(impl,
518 var_info->split_var_type, name);
519 } else {
520 split->var = nir_variable_create(shader, mode,
521 var_info->split_var_type, name);
522 }
523 } else {
524 assert(var_info->levels[level].split);
525 split->num_splits = var_info->levels[level].array_len;
526 split->splits = rzalloc_array(mem_ctx, struct array_split,
527 split->num_splits);
528 for (unsigned i = 0; i < split->num_splits; i++) {
529 create_split_array_vars(var_info, level + 1, &split->splits[i],
530 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
531 shader, impl, mem_ctx);
532 }
533 }
534 }
535
536 static bool
537 split_var_list_arrays(nir_shader *shader,
538 nir_function_impl *impl,
539 struct exec_list *vars,
540 struct hash_table *var_info_map,
541 void *mem_ctx)
542 {
543 struct exec_list split_vars;
544 exec_list_make_empty(&split_vars);
545
546 nir_foreach_variable_safe(var, vars) {
547 struct array_var_info *info = get_array_var_info(var, var_info_map);
548 if (!info)
549 continue;
550
551 bool has_split = false;
552 const struct glsl_type *split_type =
553 glsl_without_array_or_matrix(var->type);
554 for (int i = info->num_levels - 1; i >= 0; i--) {
555 if (info->levels[i].split) {
556 has_split = true;
557 continue;
558 }
559
560 /* If the original type was a matrix type, we'd like to keep that so
561 * we don't convert matrices into arrays.
562 */
563 if (i == info->num_levels - 1 &&
564 glsl_type_is_matrix(glsl_without_array(var->type))) {
565 split_type = glsl_matrix_type(glsl_get_base_type(split_type),
566 glsl_get_components(split_type),
567 info->levels[i].array_len);
568 } else {
569 split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
570 }
571 }
572
573 if (has_split) {
574 info->split_var_type = split_type;
575 /* To avoid list confusion (we'll be adding things as we split
576 * variables), pull all of the variables we plan to split off of the
577 * main variable list.
578 */
579 exec_node_remove(&var->node);
580 exec_list_push_tail(&split_vars, &var->node);
581 } else {
582 assert(split_type == glsl_get_bare_type(var->type));
583 /* If we're not modifying this variable, delete the info so we skip
584 * it faster in later passes.
585 */
586 _mesa_hash_table_remove_key(var_info_map, var);
587 }
588 }
589
590 nir_foreach_variable(var, &split_vars) {
591 struct array_var_info *info = get_array_var_info(var, var_info_map);
592 create_split_array_vars(info, 0, &info->root_split, var->name,
593 shader, impl, mem_ctx);
594 }
595
596 return !exec_list_is_empty(&split_vars);
597 }
598
599 static bool
600 deref_has_split_wildcard(nir_deref_path *path,
601 struct array_var_info *info)
602 {
603 if (info == NULL)
604 return false;
605
606 assert(path->path[0]->var == info->base_var);
607 for (unsigned i = 0; i < info->num_levels; i++) {
608 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
609 info->levels[i].split)
610 return true;
611 }
612
613 return false;
614 }
615
616 static bool
617 array_path_is_out_of_bounds(nir_deref_path *path,
618 struct array_var_info *info)
619 {
620 if (info == NULL)
621 return false;
622
623 assert(path->path[0]->var == info->base_var);
624 for (unsigned i = 0; i < info->num_levels; i++) {
625 nir_deref_instr *p = path->path[i + 1];
626 if (p->deref_type == nir_deref_type_array_wildcard)
627 continue;
628
629 if (nir_src_is_const(p->arr.index) &&
630 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
631 return true;
632 }
633
634 return false;
635 }
636
637 static void
638 emit_split_copies(nir_builder *b,
639 struct array_var_info *dst_info, nir_deref_path *dst_path,
640 unsigned dst_level, nir_deref_instr *dst,
641 struct array_var_info *src_info, nir_deref_path *src_path,
642 unsigned src_level, nir_deref_instr *src)
643 {
644 nir_deref_instr *dst_p, *src_p;
645
646 while ((dst_p = dst_path->path[dst_level + 1])) {
647 if (dst_p->deref_type == nir_deref_type_array_wildcard)
648 break;
649
650 dst = nir_build_deref_follower(b, dst, dst_p);
651 dst_level++;
652 }
653
654 while ((src_p = src_path->path[src_level + 1])) {
655 if (src_p->deref_type == nir_deref_type_array_wildcard)
656 break;
657
658 src = nir_build_deref_follower(b, src, src_p);
659 src_level++;
660 }
661
662 if (src_p == NULL || dst_p == NULL) {
663 assert(src_p == NULL && dst_p == NULL);
664 nir_copy_deref(b, dst, src);
665 } else {
666 assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
667 src_p->deref_type == nir_deref_type_array_wildcard);
668
669 if ((dst_info && dst_info->levels[dst_level].split) ||
670 (src_info && src_info->levels[src_level].split)) {
671 /* There are no indirects at this level on one of the source or the
672 * destination so we are lowering it.
673 */
674 assert(glsl_get_length(dst_path->path[dst_level]->type) ==
675 glsl_get_length(src_path->path[src_level]->type));
676 unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
677 for (unsigned i = 0; i < len; i++) {
678 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
679 nir_build_deref_array_imm(b, dst, i),
680 src_info, src_path, src_level + 1,
681 nir_build_deref_array_imm(b, src, i));
682 }
683 } else {
684 /* Neither side is being split so we just keep going */
685 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
686 nir_build_deref_array_wildcard(b, dst),
687 src_info, src_path, src_level + 1,
688 nir_build_deref_array_wildcard(b, src));
689 }
690 }
691 }
692
693 static void
694 split_array_copies_impl(nir_function_impl *impl,
695 struct hash_table *var_info_map,
696 nir_variable_mode modes,
697 void *mem_ctx)
698 {
699 nir_builder b;
700 nir_builder_init(&b, impl);
701
702 nir_foreach_block(block, impl) {
703 nir_foreach_instr_safe(instr, block) {
704 if (instr->type != nir_instr_type_intrinsic)
705 continue;
706
707 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
708 if (copy->intrinsic != nir_intrinsic_copy_deref)
709 continue;
710
711 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
712 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
713
714 struct array_var_info *dst_info =
715 get_array_deref_info(dst_deref, var_info_map, modes);
716 struct array_var_info *src_info =
717 get_array_deref_info(src_deref, var_info_map, modes);
718
719 if (!src_info && !dst_info)
720 continue;
721
722 nir_deref_path dst_path, src_path;
723 nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
724 nir_deref_path_init(&src_path, src_deref, mem_ctx);
725
726 if (!deref_has_split_wildcard(&dst_path, dst_info) &&
727 !deref_has_split_wildcard(&src_path, src_info))
728 continue;
729
730 b.cursor = nir_instr_remove(&copy->instr);
731
732 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
733 src_info, &src_path, 0, src_path.path[0]);
734 }
735 }
736 }
737
738 static void
739 split_array_access_impl(nir_function_impl *impl,
740 struct hash_table *var_info_map,
741 nir_variable_mode modes,
742 void *mem_ctx)
743 {
744 nir_builder b;
745 nir_builder_init(&b, impl);
746
747 nir_foreach_block(block, impl) {
748 nir_foreach_instr_safe(instr, block) {
749 if (instr->type == nir_instr_type_deref) {
750 /* Clean up any dead derefs we find lying around. They may refer
751 * to variables we're planning to split.
752 */
753 nir_deref_instr *deref = nir_instr_as_deref(instr);
754 if (deref->mode & modes)
755 nir_deref_instr_remove_if_unused(deref);
756 continue;
757 }
758
759 if (instr->type != nir_instr_type_intrinsic)
760 continue;
761
762 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
763 if (intrin->intrinsic != nir_intrinsic_load_deref &&
764 intrin->intrinsic != nir_intrinsic_store_deref &&
765 intrin->intrinsic != nir_intrinsic_copy_deref)
766 continue;
767
768 const unsigned num_derefs =
769 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
770
771 for (unsigned d = 0; d < num_derefs; d++) {
772 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
773
774 struct array_var_info *info =
775 get_array_deref_info(deref, var_info_map, modes);
776 if (!info)
777 continue;
778
779 nir_deref_path path;
780 nir_deref_path_init(&path, deref, mem_ctx);
781
782 b.cursor = nir_before_instr(&intrin->instr);
783
784 if (array_path_is_out_of_bounds(&path, info)) {
785 /* If one of the derefs is out-of-bounds, we just delete the
786 * instruction. If a destination is out of bounds, then it may
787 * have been in-bounds prior to shrinking so we don't want to
788 * accidentally stomp something. However, we've already proven
789 * that it will never be read so it's safe to delete. If a
790 * source is out of bounds then it is loading random garbage.
791 * For loads, we replace their uses with an undef instruction
792 * and for copies we just delete the copy since it was writing
793 * undefined garbage anyway and we may as well leave the random
794 * garbage in the destination alone.
795 */
796 if (intrin->intrinsic == nir_intrinsic_load_deref) {
797 nir_ssa_def *u =
798 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
799 intrin->dest.ssa.bit_size);
800 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
801 nir_src_for_ssa(u));
802 }
803 nir_instr_remove(&intrin->instr);
804 for (unsigned i = 0; i < num_derefs; i++)
805 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
806 break;
807 }
808
809 struct array_split *split = &info->root_split;
810 for (unsigned i = 0; i < info->num_levels; i++) {
811 if (info->levels[i].split) {
812 nir_deref_instr *p = path.path[i + 1];
813 unsigned index = nir_src_as_uint(p->arr.index);
814 assert(index < info->levels[i].array_len);
815 split = &split->splits[index];
816 }
817 }
818 assert(!split->splits && split->var);
819
820 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
821 for (unsigned i = 0; i < info->num_levels; i++) {
822 if (!info->levels[i].split) {
823 new_deref = nir_build_deref_follower(&b, new_deref,
824 path.path[i + 1]);
825 }
826 }
827 assert(new_deref->type == deref->type);
828
829 /* Rewrite the deref source to point to the split one */
830 nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
831 nir_src_for_ssa(&new_deref->dest.ssa));
832 nir_deref_instr_remove_if_unused(deref);
833 }
834 }
835 }
836 }
837
838 /** A pass for splitting arrays of vectors into multiple variables
839 *
840 * This pass looks at arrays (possibly multiple levels) of vectors (not
841 * structures or other types) and tries to split them into piles of variables,
842 * one for each array element. The heuristic used is simple: If a given array
843 * level is never used with an indirect, that array level will get split.
844 *
845 * This pass probably could handles structures easily enough but making a pass
846 * that could see through an array of structures of arrays would be difficult
847 * so it's best to just run nir_split_struct_vars first.
848 */
849 bool
850 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
851 {
852 void *mem_ctx = ralloc_context(NULL);
853 struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
854 struct set *complex_vars = NULL;
855
856 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
857
858 bool has_global_array = false;
859 if (modes & nir_var_shader_temp) {
860 has_global_array = init_var_list_array_infos(shader,
861 &shader->globals,
862 var_info_map,
863 &complex_vars,
864 mem_ctx);
865 }
866
867 bool has_any_array = false;
868 nir_foreach_function(function, shader) {
869 if (!function->impl)
870 continue;
871
872 bool has_local_array = false;
873 if (modes & nir_var_function_temp) {
874 has_local_array = init_var_list_array_infos(shader,
875 &function->impl->locals,
876 var_info_map,
877 &complex_vars,
878 mem_ctx);
879 }
880
881 if (has_global_array || has_local_array) {
882 has_any_array = true;
883 mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
884 }
885 }
886
887 /* If we failed to find any arrays of arrays, bail early. */
888 if (!has_any_array) {
889 ralloc_free(mem_ctx);
890 nir_shader_preserve_all_metadata(shader);
891 return false;
892 }
893
894 bool has_global_splits = false;
895 if (modes & nir_var_shader_temp) {
896 has_global_splits = split_var_list_arrays(shader, NULL,
897 &shader->globals,
898 var_info_map, mem_ctx);
899 }
900
901 bool progress = false;
902 nir_foreach_function(function, shader) {
903 if (!function->impl)
904 continue;
905
906 bool has_local_splits = false;
907 if (modes & nir_var_function_temp) {
908 has_local_splits = split_var_list_arrays(shader, function->impl,
909 &function->impl->locals,
910 var_info_map, mem_ctx);
911 }
912
913 if (has_global_splits || has_local_splits) {
914 split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
915 split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
916
917 nir_metadata_preserve(function->impl, nir_metadata_block_index |
918 nir_metadata_dominance);
919 progress = true;
920 } else {
921 nir_metadata_preserve(function->impl, nir_metadata_all);
922 }
923 }
924
925 ralloc_free(mem_ctx);
926
927 return progress;
928 }
929
930 struct array_level_usage {
931 unsigned array_len;
932
933 /* The value UINT_MAX will be used to indicate an indirect */
934 unsigned max_read;
935 unsigned max_written;
936
937 /* True if there is a copy that isn't to/from a shrinkable array */
938 bool has_external_copy;
939 struct set *levels_copied;
940 };
941
942 struct vec_var_usage {
943 /* Convenience set of all components this variable has */
944 nir_component_mask_t all_comps;
945
946 nir_component_mask_t comps_read;
947 nir_component_mask_t comps_written;
948
949 nir_component_mask_t comps_kept;
950
951 /* True if there is a copy that isn't to/from a shrinkable vector */
952 bool has_external_copy;
953 bool has_complex_use;
954 struct set *vars_copied;
955
956 unsigned num_levels;
957 struct array_level_usage levels[0];
958 };
959
960 static struct vec_var_usage *
961 get_vec_var_usage(nir_variable *var,
962 struct hash_table *var_usage_map,
963 bool add_usage_entry, void *mem_ctx)
964 {
965 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
966 if (entry)
967 return entry->data;
968
969 if (!add_usage_entry)
970 return NULL;
971
972 /* Check to make sure that we are working with an array of vectors. We
973 * don't bother to shrink single vectors because we figure that we can
974 * clean it up better with SSA than by inserting piles of vecN instructions
975 * to compact results.
976 */
977 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
978 if (num_levels < 1)
979 return NULL; /* Not an array of vectors */
980
981 struct vec_var_usage *usage =
982 rzalloc_size(mem_ctx, sizeof(*usage) +
983 num_levels * sizeof(usage->levels[0]));
984
985 usage->num_levels = num_levels;
986 const struct glsl_type *type = var->type;
987 for (unsigned i = 0; i < num_levels; i++) {
988 usage->levels[i].array_len = glsl_get_length(type);
989 type = glsl_get_array_element(type);
990 }
991 assert(glsl_type_is_vector_or_scalar(type));
992
993 usage->all_comps = (1 << glsl_get_components(type)) - 1;
994
995 _mesa_hash_table_insert(var_usage_map, var, usage);
996
997 return usage;
998 }
999
1000 static struct vec_var_usage *
1001 get_vec_deref_usage(nir_deref_instr *deref,
1002 struct hash_table *var_usage_map,
1003 nir_variable_mode modes,
1004 bool add_usage_entry, void *mem_ctx)
1005 {
1006 if (!(deref->mode & modes))
1007 return NULL;
1008
1009 return get_vec_var_usage(nir_deref_instr_get_variable(deref),
1010 var_usage_map, add_usage_entry, mem_ctx);
1011 }
1012
1013 static void
1014 mark_deref_if_complex(nir_deref_instr *deref,
1015 struct hash_table *var_usage_map,
1016 nir_variable_mode modes,
1017 void *mem_ctx)
1018 {
1019 if (!(deref->mode & modes))
1020 return;
1021
1022 /* Only bother with var derefs because nir_deref_instr_has_complex_use is
1023 * recursive.
1024 */
1025 if (deref->deref_type != nir_deref_type_var)
1026 return;
1027
1028 if (!nir_deref_instr_has_complex_use(deref))
1029 return;
1030
1031 struct vec_var_usage *usage =
1032 get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx);
1033 if (!usage)
1034 return;
1035
1036 usage->has_complex_use = true;
1037 }
1038
1039 static void
1040 mark_deref_used(nir_deref_instr *deref,
1041 nir_component_mask_t comps_read,
1042 nir_component_mask_t comps_written,
1043 nir_deref_instr *copy_deref,
1044 struct hash_table *var_usage_map,
1045 nir_variable_mode modes,
1046 void *mem_ctx)
1047 {
1048 if (!(deref->mode & modes))
1049 return;
1050
1051 nir_variable *var = nir_deref_instr_get_variable(deref);
1052 if (var == NULL)
1053 return;
1054
1055 struct vec_var_usage *usage =
1056 get_vec_var_usage(var, var_usage_map, true, mem_ctx);
1057 if (!usage)
1058 return;
1059
1060 usage->comps_read |= comps_read & usage->all_comps;
1061 usage->comps_written |= comps_written & usage->all_comps;
1062
1063 struct vec_var_usage *copy_usage = NULL;
1064 if (copy_deref) {
1065 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
1066 true, mem_ctx);
1067 if (copy_usage) {
1068 if (usage->vars_copied == NULL) {
1069 usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
1070 }
1071 _mesa_set_add(usage->vars_copied, copy_usage);
1072 } else {
1073 usage->has_external_copy = true;
1074 }
1075 }
1076
1077 nir_deref_path path;
1078 nir_deref_path_init(&path, deref, mem_ctx);
1079
1080 nir_deref_path copy_path;
1081 if (copy_usage)
1082 nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
1083
1084 unsigned copy_i = 0;
1085 for (unsigned i = 0; i < usage->num_levels; i++) {
1086 struct array_level_usage *level = &usage->levels[i];
1087 nir_deref_instr *deref = path.path[i + 1];
1088 assert(deref->deref_type == nir_deref_type_array ||
1089 deref->deref_type == nir_deref_type_array_wildcard);
1090
1091 unsigned max_used;
1092 if (deref->deref_type == nir_deref_type_array) {
1093 max_used = nir_src_is_const(deref->arr.index) ?
1094 nir_src_as_uint(deref->arr.index) : UINT_MAX;
1095 } else {
1096 /* For wildcards, we read or wrote the whole thing. */
1097 assert(deref->deref_type == nir_deref_type_array_wildcard);
1098 max_used = level->array_len - 1;
1099
1100 if (copy_usage) {
1101 /* Match each wildcard level with the level on copy_usage */
1102 for (; copy_path.path[copy_i + 1]; copy_i++) {
1103 if (copy_path.path[copy_i + 1]->deref_type ==
1104 nir_deref_type_array_wildcard)
1105 break;
1106 }
1107 struct array_level_usage *copy_level =
1108 &copy_usage->levels[copy_i++];
1109
1110 if (level->levels_copied == NULL) {
1111 level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1112 }
1113 _mesa_set_add(level->levels_copied, copy_level);
1114 } else {
1115 /* We have a wildcard and it comes from a variable we aren't
1116 * tracking; flag it and we'll know to not shorten this array.
1117 */
1118 level->has_external_copy = true;
1119 }
1120 }
1121
1122 if (comps_written)
1123 level->max_written = MAX2(level->max_written, max_used);
1124 if (comps_read)
1125 level->max_read = MAX2(level->max_read, max_used);
1126 }
1127 }
1128
1129 static bool
1130 src_is_load_deref(nir_src src, nir_src deref_src)
1131 {
1132 nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1133 if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1134 return false;
1135
1136 assert(load->src[0].is_ssa);
1137
1138 return load->src[0].ssa == deref_src.ssa;
1139 }
1140
1141 /* Returns all non-self-referential components of a store instruction. A
1142 * component is self-referential if it comes from the same component of a load
1143 * instruction on the same deref. If the only data in a particular component
1144 * of a variable came directly from that component then it's undefined. The
1145 * only way to get defined data into a component of a variable is for it to
1146 * get written there by something outside or from a different component.
1147 *
1148 * This is a fairly common pattern in shaders that come from either GLSL IR or
1149 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1150 * load-vec-store.
1151 */
1152 static nir_component_mask_t
1153 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1154 {
1155 nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1156
1157 assert(store->src[1].is_ssa);
1158 nir_instr *src_instr = store->src[1].ssa->parent_instr;
1159 if (src_instr->type != nir_instr_type_alu)
1160 return comps;
1161
1162 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1163
1164 if (src_alu->op == nir_op_mov) {
1165 /* If it's just a swizzle of a load from the same deref, discount any
1166 * channels that don't move in the swizzle.
1167 */
1168 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1169 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1170 if (src_alu->src[0].swizzle[i] == i)
1171 comps &= ~(1u << i);
1172 }
1173 }
1174 } else if (nir_op_is_vec(src_alu->op)) {
1175 /* If it's a vec, discount any channels that are just loads from the
1176 * same deref put in the same spot.
1177 */
1178 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1179 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1180 src_alu->src[i].swizzle[0] == i)
1181 comps &= ~(1u << i);
1182 }
1183 }
1184
1185 return comps;
1186 }
1187
1188 static void
1189 find_used_components_impl(nir_function_impl *impl,
1190 struct hash_table *var_usage_map,
1191 nir_variable_mode modes,
1192 void *mem_ctx)
1193 {
1194 nir_foreach_block(block, impl) {
1195 nir_foreach_instr(instr, block) {
1196 if (instr->type == nir_instr_type_deref) {
1197 mark_deref_if_complex(nir_instr_as_deref(instr),
1198 var_usage_map, modes, mem_ctx);
1199 }
1200
1201 if (instr->type != nir_instr_type_intrinsic)
1202 continue;
1203
1204 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1205 switch (intrin->intrinsic) {
1206 case nir_intrinsic_load_deref:
1207 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1208 nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1209 NULL, var_usage_map, modes, mem_ctx);
1210 break;
1211
1212 case nir_intrinsic_store_deref:
1213 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1214 0, get_non_self_referential_store_comps(intrin),
1215 NULL, var_usage_map, modes, mem_ctx);
1216 break;
1217
1218 case nir_intrinsic_copy_deref: {
1219 /* Just mark everything used for copies. */
1220 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1221 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1222 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1223 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1224 break;
1225 }
1226
1227 default:
1228 break;
1229 }
1230 }
1231 }
1232 }
1233
1234 static bool
1235 shrink_vec_var_list(struct exec_list *vars,
1236 struct hash_table *var_usage_map)
1237 {
1238 /* Initialize the components kept field of each variable. This is the
1239 * AND of the components written and components read. If a component is
1240 * written but never read, it's dead. If it is read but never written,
1241 * then all values read are undefined garbage and we may as well not read
1242 * them.
1243 *
1244 * The same logic applies to the array length. We make the array length
1245 * the minimum needed required length between read and write and plan to
1246 * discard any OOB access. The one exception here is indirect writes
1247 * because we don't know where they will land and we can't shrink an array
1248 * with indirect writes because previously in-bounds writes may become
1249 * out-of-bounds and have undefined behavior.
1250 *
1251 * Also, if we have a copy that to/from something we can't shrink, we need
1252 * to leave components and array_len of any wildcards alone.
1253 */
1254 nir_foreach_variable(var, vars) {
1255 struct vec_var_usage *usage =
1256 get_vec_var_usage(var, var_usage_map, false, NULL);
1257 if (!usage)
1258 continue;
1259
1260 assert(usage->comps_kept == 0);
1261 if (usage->has_external_copy || usage->has_complex_use)
1262 usage->comps_kept = usage->all_comps;
1263 else
1264 usage->comps_kept = usage->comps_read & usage->comps_written;
1265
1266 for (unsigned i = 0; i < usage->num_levels; i++) {
1267 struct array_level_usage *level = &usage->levels[i];
1268 assert(level->array_len > 0);
1269
1270 if (level->max_written == UINT_MAX || level->has_external_copy ||
1271 usage->has_complex_use)
1272 continue; /* Can't shrink */
1273
1274 unsigned max_used = MIN2(level->max_read, level->max_written);
1275 level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1276 }
1277 }
1278
1279 /* In order for variable copies to work, we have to have the same data type
1280 * on the source and the destination. In order to satisfy this, we run a
1281 * little fixed-point algorithm to transitively ensure that we get enough
1282 * components and array elements for this to hold for all copies.
1283 */
1284 bool fp_progress;
1285 do {
1286 fp_progress = false;
1287 nir_foreach_variable(var, vars) {
1288 struct vec_var_usage *var_usage =
1289 get_vec_var_usage(var, var_usage_map, false, NULL);
1290 if (!var_usage || !var_usage->vars_copied)
1291 continue;
1292
1293 set_foreach(var_usage->vars_copied, copy_entry) {
1294 struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1295 if (copy_usage->comps_kept != var_usage->comps_kept) {
1296 nir_component_mask_t comps_kept =
1297 (var_usage->comps_kept | copy_usage->comps_kept);
1298 var_usage->comps_kept = comps_kept;
1299 copy_usage->comps_kept = comps_kept;
1300 fp_progress = true;
1301 }
1302 }
1303
1304 for (unsigned i = 0; i < var_usage->num_levels; i++) {
1305 struct array_level_usage *var_level = &var_usage->levels[i];
1306 if (!var_level->levels_copied)
1307 continue;
1308
1309 set_foreach(var_level->levels_copied, copy_entry) {
1310 struct array_level_usage *copy_level = (void *)copy_entry->key;
1311 if (var_level->array_len != copy_level->array_len) {
1312 unsigned array_len =
1313 MAX2(var_level->array_len, copy_level->array_len);
1314 var_level->array_len = array_len;
1315 copy_level->array_len = array_len;
1316 fp_progress = true;
1317 }
1318 }
1319 }
1320 }
1321 } while (fp_progress);
1322
1323 bool vars_shrunk = false;
1324 nir_foreach_variable_safe(var, vars) {
1325 struct vec_var_usage *usage =
1326 get_vec_var_usage(var, var_usage_map, false, NULL);
1327 if (!usage)
1328 continue;
1329
1330 bool shrunk = false;
1331 const struct glsl_type *vec_type = var->type;
1332 for (unsigned i = 0; i < usage->num_levels; i++) {
1333 /* If we've reduced the array to zero elements at some level, just
1334 * set comps_kept to 0 and delete the variable.
1335 */
1336 if (usage->levels[i].array_len == 0) {
1337 usage->comps_kept = 0;
1338 break;
1339 }
1340
1341 assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1342 if (usage->levels[i].array_len < glsl_get_length(vec_type))
1343 shrunk = true;
1344 vec_type = glsl_get_array_element(vec_type);
1345 }
1346 assert(glsl_type_is_vector_or_scalar(vec_type));
1347
1348 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1349 if (usage->comps_kept != usage->all_comps)
1350 shrunk = true;
1351
1352 if (usage->comps_kept == 0) {
1353 /* This variable is dead, remove it */
1354 vars_shrunk = true;
1355 exec_node_remove(&var->node);
1356 continue;
1357 }
1358
1359 if (!shrunk) {
1360 /* This variable doesn't need to be shrunk. Remove it from the
1361 * hash table so later steps will ignore it.
1362 */
1363 _mesa_hash_table_remove_key(var_usage_map, var);
1364 continue;
1365 }
1366
1367 /* Build the new var type */
1368 unsigned new_num_comps = util_bitcount(usage->comps_kept);
1369 const struct glsl_type *new_type =
1370 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1371 for (int i = usage->num_levels - 1; i >= 0; i--) {
1372 assert(usage->levels[i].array_len > 0);
1373 /* If the original type was a matrix type, we'd like to keep that so
1374 * we don't convert matrices into arrays.
1375 */
1376 if (i == usage->num_levels - 1 &&
1377 glsl_type_is_matrix(glsl_without_array(var->type)) &&
1378 new_num_comps > 1 && usage->levels[i].array_len > 1) {
1379 new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1380 new_num_comps,
1381 usage->levels[i].array_len);
1382 } else {
1383 new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1384 }
1385 }
1386 var->type = new_type;
1387
1388 vars_shrunk = true;
1389 }
1390
1391 return vars_shrunk;
1392 }
1393
1394 static bool
1395 vec_deref_is_oob(nir_deref_instr *deref,
1396 struct vec_var_usage *usage)
1397 {
1398 nir_deref_path path;
1399 nir_deref_path_init(&path, deref, NULL);
1400
1401 bool oob = false;
1402 for (unsigned i = 0; i < usage->num_levels; i++) {
1403 nir_deref_instr *p = path.path[i + 1];
1404 if (p->deref_type == nir_deref_type_array_wildcard)
1405 continue;
1406
1407 if (nir_src_is_const(p->arr.index) &&
1408 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1409 oob = true;
1410 break;
1411 }
1412 }
1413
1414 nir_deref_path_finish(&path);
1415
1416 return oob;
1417 }
1418
1419 static bool
1420 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1421 struct hash_table *var_usage_map,
1422 nir_variable_mode modes)
1423 {
1424 struct vec_var_usage *usage =
1425 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1426 if (!usage)
1427 return false;
1428
1429 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1430 }
1431
1432 static void
1433 shrink_vec_var_access_impl(nir_function_impl *impl,
1434 struct hash_table *var_usage_map,
1435 nir_variable_mode modes)
1436 {
1437 nir_builder b;
1438 nir_builder_init(&b, impl);
1439
1440 nir_foreach_block(block, impl) {
1441 nir_foreach_instr_safe(instr, block) {
1442 switch (instr->type) {
1443 case nir_instr_type_deref: {
1444 nir_deref_instr *deref = nir_instr_as_deref(instr);
1445 if (!(deref->mode & modes))
1446 break;
1447
1448 /* Clean up any dead derefs we find lying around. They may refer
1449 * to variables we've deleted.
1450 */
1451 if (nir_deref_instr_remove_if_unused(deref))
1452 break;
1453
1454 /* Update the type in the deref to keep the types consistent as
1455 * you walk down the chain. We don't need to check if this is one
1456 * of the derefs we're shrinking because this is a no-op if it
1457 * isn't. The worst that could happen is that we accidentally fix
1458 * an invalid deref.
1459 */
1460 if (deref->deref_type == nir_deref_type_var) {
1461 deref->type = deref->var->type;
1462 } else if (deref->deref_type == nir_deref_type_array ||
1463 deref->deref_type == nir_deref_type_array_wildcard) {
1464 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1465 assert(glsl_type_is_array(parent->type) ||
1466 glsl_type_is_matrix(parent->type));
1467 deref->type = glsl_get_array_element(parent->type);
1468 }
1469 break;
1470 }
1471
1472 case nir_instr_type_intrinsic: {
1473 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1474
1475 /* If we have a copy whose source or destination has been deleted
1476 * because we determined the variable was dead, then we just
1477 * delete the copy instruction. If the source variable was dead
1478 * then it was writing undefined garbage anyway and if it's the
1479 * destination variable that's dead then the write isn't needed.
1480 */
1481 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1482 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1483 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1484 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1485 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1486 nir_instr_remove(&intrin->instr);
1487 nir_deref_instr_remove_if_unused(dst);
1488 nir_deref_instr_remove_if_unused(src);
1489 }
1490 continue;
1491 }
1492
1493 if (intrin->intrinsic != nir_intrinsic_load_deref &&
1494 intrin->intrinsic != nir_intrinsic_store_deref)
1495 continue;
1496
1497 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1498 if (!(deref->mode & modes))
1499 continue;
1500
1501 struct vec_var_usage *usage =
1502 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1503 if (!usage)
1504 continue;
1505
1506 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1507 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1508 nir_ssa_def *u =
1509 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1510 intrin->dest.ssa.bit_size);
1511 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1512 nir_src_for_ssa(u));
1513 }
1514 nir_instr_remove(&intrin->instr);
1515 nir_deref_instr_remove_if_unused(deref);
1516 continue;
1517 }
1518
1519 /* If we're not dropping any components, there's no need to
1520 * compact vectors.
1521 */
1522 if (usage->comps_kept == usage->all_comps)
1523 continue;
1524
1525 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1526 b.cursor = nir_after_instr(&intrin->instr);
1527
1528 nir_ssa_def *undef =
1529 nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1530 nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1531 unsigned c = 0;
1532 for (unsigned i = 0; i < intrin->num_components; i++) {
1533 if (usage->comps_kept & (1u << i))
1534 vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1535 else
1536 vec_srcs[i] = undef;
1537 }
1538 nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1539
1540 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1541 nir_src_for_ssa(vec),
1542 vec->parent_instr);
1543
1544 /* The SSA def is now only used by the swizzle. It's safe to
1545 * shrink the number of components.
1546 */
1547 assert(list_length(&intrin->dest.ssa.uses) == c);
1548 intrin->num_components = c;
1549 intrin->dest.ssa.num_components = c;
1550 } else {
1551 nir_component_mask_t write_mask =
1552 nir_intrinsic_write_mask(intrin);
1553
1554 unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1555 nir_component_mask_t new_write_mask = 0;
1556 unsigned c = 0;
1557 for (unsigned i = 0; i < intrin->num_components; i++) {
1558 if (usage->comps_kept & (1u << i)) {
1559 swizzle[c] = i;
1560 if (write_mask & (1u << i))
1561 new_write_mask |= 1u << c;
1562 c++;
1563 }
1564 }
1565
1566 b.cursor = nir_before_instr(&intrin->instr);
1567
1568 nir_ssa_def *swizzled =
1569 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c);
1570
1571 /* Rewrite to use the compacted source */
1572 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1573 nir_src_for_ssa(swizzled));
1574 nir_intrinsic_set_write_mask(intrin, new_write_mask);
1575 intrin->num_components = c;
1576 }
1577 break;
1578 }
1579
1580 default:
1581 break;
1582 }
1583 }
1584 }
1585 }
1586
1587 static bool
1588 function_impl_has_vars_with_modes(nir_function_impl *impl,
1589 nir_variable_mode modes)
1590 {
1591 nir_shader *shader = impl->function->shader;
1592
1593 if ((modes & nir_var_shader_temp) && !exec_list_is_empty(&shader->globals))
1594 return true;
1595
1596 if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1597 return true;
1598
1599 return false;
1600 }
1601
1602 /** Attempt to shrink arrays of vectors
1603 *
1604 * This pass looks at variables which contain a vector or an array (possibly
1605 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1606 * or array. If the pass can prove that a component of a vector (or array of
1607 * vectors) is never really used, then that component will be removed.
1608 * Similarly, the pass attempts to shorten arrays based on what elements it
1609 * can prove are never read or never contain valid data.
1610 */
1611 bool
1612 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1613 {
1614 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1615
1616 void *mem_ctx = ralloc_context(NULL);
1617
1618 struct hash_table *var_usage_map =
1619 _mesa_pointer_hash_table_create(mem_ctx);
1620
1621 bool has_vars_to_shrink = false;
1622 nir_foreach_function(function, shader) {
1623 if (!function->impl)
1624 continue;
1625
1626 /* Don't even bother crawling the IR if we don't have any variables.
1627 * Given that this pass deletes any unused variables, it's likely that
1628 * we will be in this scenario eventually.
1629 */
1630 if (function_impl_has_vars_with_modes(function->impl, modes)) {
1631 has_vars_to_shrink = true;
1632 find_used_components_impl(function->impl, var_usage_map,
1633 modes, mem_ctx);
1634 }
1635 }
1636 if (!has_vars_to_shrink) {
1637 ralloc_free(mem_ctx);
1638 nir_shader_preserve_all_metadata(shader);
1639 return false;
1640 }
1641
1642 bool globals_shrunk = false;
1643 if (modes & nir_var_shader_temp)
1644 globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
1645
1646 bool progress = false;
1647 nir_foreach_function(function, shader) {
1648 if (!function->impl)
1649 continue;
1650
1651 bool locals_shrunk = false;
1652 if (modes & nir_var_function_temp) {
1653 locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1654 var_usage_map);
1655 }
1656
1657 if (globals_shrunk || locals_shrunk) {
1658 shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1659
1660 nir_metadata_preserve(function->impl, nir_metadata_block_index |
1661 nir_metadata_dominance);
1662 progress = true;
1663 } else {
1664 nir_metadata_preserve(function->impl, nir_metadata_all);
1665 }
1666 }
1667
1668 ralloc_free(mem_ctx);
1669
1670 return progress;
1671 }