nir/split_vars: Add mode checks to list walks
[mesa.git] / src / compiler / nir / nir_split_vars.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28
29 #include "util/set.h"
30 #include "util/u_math.h"
31
32 static struct set *
33 get_complex_used_vars(nir_shader *shader, void *mem_ctx)
34 {
35 struct set *complex_vars = _mesa_pointer_set_create(mem_ctx);
36
37 nir_foreach_function(function, shader) {
38 if (!function->impl)
39 continue;
40
41 nir_foreach_block(block, function->impl) {
42 nir_foreach_instr(instr, block) {
43 if (instr->type != nir_instr_type_deref)
44 continue;
45
46 nir_deref_instr *deref = nir_instr_as_deref(instr);
47
48 /* We only need to consider var derefs because
49 * nir_deref_instr_has_complex_use is recursive.
50 */
51 if (deref->deref_type == nir_deref_type_var &&
52 nir_deref_instr_has_complex_use(deref))
53 _mesa_set_add(complex_vars, deref->var);
54 }
55 }
56 }
57
58 return complex_vars;
59 }
60
61 struct split_var_state {
62 void *mem_ctx;
63
64 nir_shader *shader;
65 nir_function_impl *impl;
66
67 nir_variable *base_var;
68 };
69
70 struct field {
71 struct field *parent;
72
73 const struct glsl_type *type;
74
75 unsigned num_fields;
76 struct field *fields;
77
78 nir_variable *var;
79 };
80
81 static const struct glsl_type *
82 wrap_type_in_array(const struct glsl_type *type,
83 const struct glsl_type *array_type)
84 {
85 if (!glsl_type_is_array(array_type))
86 return type;
87
88 const struct glsl_type *elem_type =
89 wrap_type_in_array(type, glsl_get_array_element(array_type));
90 assert(glsl_get_explicit_stride(array_type) == 0);
91 return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
92 }
93
94 static int
95 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
96 {
97 int num_levels = 0;
98 while (true) {
99 if (glsl_type_is_array_or_matrix(type)) {
100 num_levels++;
101 type = glsl_get_array_element(type);
102 } else if (glsl_type_is_vector_or_scalar(type)) {
103 return num_levels;
104 } else {
105 /* Not an array of vectors */
106 return -1;
107 }
108 }
109 }
110
111 static void
112 init_field_for_type(struct field *field, struct field *parent,
113 const struct glsl_type *type,
114 const char *name,
115 struct split_var_state *state)
116 {
117 *field = (struct field) {
118 .parent = parent,
119 .type = type,
120 };
121
122 const struct glsl_type *struct_type = glsl_without_array(type);
123 if (glsl_type_is_struct_or_ifc(struct_type)) {
124 field->num_fields = glsl_get_length(struct_type),
125 field->fields = ralloc_array(state->mem_ctx, struct field,
126 field->num_fields);
127 for (unsigned i = 0; i < field->num_fields; i++) {
128 char *field_name = NULL;
129 if (name) {
130 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
131 glsl_get_struct_elem_name(struct_type, i));
132 } else {
133 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
134 glsl_get_type_name(struct_type),
135 glsl_get_struct_elem_name(struct_type, i));
136 }
137 init_field_for_type(&field->fields[i], field,
138 glsl_get_struct_field(struct_type, i),
139 field_name, state);
140 }
141 } else {
142 const struct glsl_type *var_type = type;
143 for (struct field *f = field->parent; f; f = f->parent)
144 var_type = wrap_type_in_array(var_type, f->type);
145
146 nir_variable_mode mode = state->base_var->data.mode;
147 if (mode == nir_var_function_temp) {
148 field->var = nir_local_variable_create(state->impl, var_type, name);
149 } else {
150 field->var = nir_variable_create(state->shader, mode, var_type, name);
151 }
152 }
153 }
154
155 static bool
156 split_var_list_structs(nir_shader *shader,
157 nir_function_impl *impl,
158 struct exec_list *vars,
159 nir_variable_mode mode,
160 struct hash_table *var_field_map,
161 struct set **complex_vars,
162 void *mem_ctx)
163 {
164 struct split_var_state state = {
165 .mem_ctx = mem_ctx,
166 .shader = shader,
167 .impl = impl,
168 };
169
170 struct exec_list split_vars;
171 exec_list_make_empty(&split_vars);
172
173 /* To avoid list confusion (we'll be adding things as we split variables),
174 * pull all of the variables we plan to split off of the list
175 */
176 nir_foreach_variable_safe(var, vars) {
177 if (var->data.mode != mode)
178 continue;
179
180 if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
181 continue;
182
183 if (*complex_vars == NULL)
184 *complex_vars = get_complex_used_vars(shader, mem_ctx);
185
186 /* We can't split a variable that's referenced with deref that has any
187 * sort of complex usage.
188 */
189 if (_mesa_set_search(*complex_vars, var))
190 continue;
191
192 exec_node_remove(&var->node);
193 exec_list_push_tail(&split_vars, &var->node);
194 }
195
196 nir_foreach_variable(var, &split_vars) {
197 state.base_var = var;
198
199 struct field *root_field = ralloc(mem_ctx, struct field);
200 init_field_for_type(root_field, NULL, var->type, var->name, &state);
201 _mesa_hash_table_insert(var_field_map, var, root_field);
202 }
203
204 return !exec_list_is_empty(&split_vars);
205 }
206
207 static void
208 split_struct_derefs_impl(nir_function_impl *impl,
209 struct hash_table *var_field_map,
210 nir_variable_mode modes,
211 void *mem_ctx)
212 {
213 nir_builder b;
214 nir_builder_init(&b, impl);
215
216 nir_foreach_block(block, impl) {
217 nir_foreach_instr_safe(instr, block) {
218 if (instr->type != nir_instr_type_deref)
219 continue;
220
221 nir_deref_instr *deref = nir_instr_as_deref(instr);
222 if (!(deref->mode & modes))
223 continue;
224
225 /* Clean up any dead derefs we find lying around. They may refer to
226 * variables we're planning to split.
227 */
228 if (nir_deref_instr_remove_if_unused(deref))
229 continue;
230
231 if (!glsl_type_is_vector_or_scalar(deref->type))
232 continue;
233
234 nir_variable *base_var = nir_deref_instr_get_variable(deref);
235 struct hash_entry *entry =
236 _mesa_hash_table_search(var_field_map, base_var);
237 if (!entry)
238 continue;
239
240 struct field *root_field = entry->data;
241
242 nir_deref_path path;
243 nir_deref_path_init(&path, deref, mem_ctx);
244
245 struct field *tail_field = root_field;
246 for (unsigned i = 0; path.path[i]; i++) {
247 if (path.path[i]->deref_type != nir_deref_type_struct)
248 continue;
249
250 assert(i > 0);
251 assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
252 assert(path.path[i - 1]->type ==
253 glsl_without_array(tail_field->type));
254
255 tail_field = &tail_field->fields[path.path[i]->strct.index];
256 }
257 nir_variable *split_var = tail_field->var;
258
259 nir_deref_instr *new_deref = NULL;
260 for (unsigned i = 0; path.path[i]; i++) {
261 nir_deref_instr *p = path.path[i];
262 b.cursor = nir_after_instr(&p->instr);
263
264 switch (p->deref_type) {
265 case nir_deref_type_var:
266 assert(new_deref == NULL);
267 new_deref = nir_build_deref_var(&b, split_var);
268 break;
269
270 case nir_deref_type_array:
271 case nir_deref_type_array_wildcard:
272 new_deref = nir_build_deref_follower(&b, new_deref, p);
273 break;
274
275 case nir_deref_type_struct:
276 /* Nothing to do; we're splitting structs */
277 break;
278
279 default:
280 unreachable("Invalid deref type in path");
281 }
282 }
283
284 assert(new_deref->type == deref->type);
285 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
286 nir_src_for_ssa(&new_deref->dest.ssa));
287 nir_deref_instr_remove_if_unused(deref);
288 }
289 }
290 }
291
292 /** A pass for splitting structs into multiple variables
293 *
294 * This pass splits arrays of structs into multiple variables, one for each
295 * (possibly nested) structure member. After this pass completes, no
296 * variables of the given mode will contain a struct type.
297 */
298 bool
299 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
300 {
301 void *mem_ctx = ralloc_context(NULL);
302 struct hash_table *var_field_map =
303 _mesa_pointer_hash_table_create(mem_ctx);
304 struct set *complex_vars = NULL;
305
306 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
307
308 bool has_global_splits = false;
309 if (modes & nir_var_shader_temp) {
310 has_global_splits = split_var_list_structs(shader, NULL,
311 &shader->globals,
312 nir_var_shader_temp,
313 var_field_map,
314 &complex_vars,
315 mem_ctx);
316 }
317
318 bool progress = false;
319 nir_foreach_function(function, shader) {
320 if (!function->impl)
321 continue;
322
323 bool has_local_splits = false;
324 if (modes & nir_var_function_temp) {
325 has_local_splits = split_var_list_structs(shader, function->impl,
326 &function->impl->locals,
327 nir_var_function_temp,
328 var_field_map,
329 &complex_vars,
330 mem_ctx);
331 }
332
333 if (has_global_splits || has_local_splits) {
334 split_struct_derefs_impl(function->impl, var_field_map,
335 modes, mem_ctx);
336
337 nir_metadata_preserve(function->impl, nir_metadata_block_index |
338 nir_metadata_dominance);
339 progress = true;
340 } else {
341 nir_metadata_preserve(function->impl, nir_metadata_all);
342 }
343 }
344
345 ralloc_free(mem_ctx);
346
347 return progress;
348 }
349
350 struct array_level_info {
351 unsigned array_len;
352 bool split;
353 };
354
355 struct array_split {
356 /* Only set if this is the tail end of the splitting */
357 nir_variable *var;
358
359 unsigned num_splits;
360 struct array_split *splits;
361 };
362
363 struct array_var_info {
364 nir_variable *base_var;
365
366 const struct glsl_type *split_var_type;
367
368 bool split_var;
369 struct array_split root_split;
370
371 unsigned num_levels;
372 struct array_level_info levels[0];
373 };
374
375 static bool
376 init_var_list_array_infos(nir_shader *shader,
377 struct exec_list *vars,
378 nir_variable_mode mode,
379 struct hash_table *var_info_map,
380 struct set **complex_vars,
381 void *mem_ctx)
382 {
383 bool has_array = false;
384
385 nir_foreach_variable(var, vars) {
386 if (var->data.mode != mode)
387 continue;
388
389 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
390 if (num_levels <= 0)
391 continue;
392
393 if (*complex_vars == NULL)
394 *complex_vars = get_complex_used_vars(shader, mem_ctx);
395
396 /* We can't split a variable that's referenced with deref that has any
397 * sort of complex usage.
398 */
399 if (_mesa_set_search(*complex_vars, var))
400 continue;
401
402 struct array_var_info *info =
403 rzalloc_size(mem_ctx, sizeof(*info) +
404 num_levels * sizeof(info->levels[0]));
405
406 info->base_var = var;
407 info->num_levels = num_levels;
408
409 const struct glsl_type *type = var->type;
410 for (int i = 0; i < num_levels; i++) {
411 info->levels[i].array_len = glsl_get_length(type);
412 type = glsl_get_array_element(type);
413
414 /* All levels start out initially as split */
415 info->levels[i].split = true;
416 }
417
418 _mesa_hash_table_insert(var_info_map, var, info);
419 has_array = true;
420 }
421
422 return has_array;
423 }
424
425 static struct array_var_info *
426 get_array_var_info(nir_variable *var,
427 struct hash_table *var_info_map)
428 {
429 struct hash_entry *entry =
430 _mesa_hash_table_search(var_info_map, var);
431 return entry ? entry->data : NULL;
432 }
433
434 static struct array_var_info *
435 get_array_deref_info(nir_deref_instr *deref,
436 struct hash_table *var_info_map,
437 nir_variable_mode modes)
438 {
439 if (!(deref->mode & modes))
440 return NULL;
441
442 nir_variable *var = nir_deref_instr_get_variable(deref);
443 if (var == NULL)
444 return NULL;
445
446 return get_array_var_info(var, var_info_map);
447 }
448
449 static void
450 mark_array_deref_used(nir_deref_instr *deref,
451 struct hash_table *var_info_map,
452 nir_variable_mode modes,
453 void *mem_ctx)
454 {
455 struct array_var_info *info =
456 get_array_deref_info(deref, var_info_map, modes);
457 if (!info)
458 return;
459
460 nir_deref_path path;
461 nir_deref_path_init(&path, deref, mem_ctx);
462
463 /* Walk the path and look for indirects. If we have an array deref with an
464 * indirect, mark the given level as not being split.
465 */
466 for (unsigned i = 0; i < info->num_levels; i++) {
467 nir_deref_instr *p = path.path[i + 1];
468 if (p->deref_type == nir_deref_type_array &&
469 !nir_src_is_const(p->arr.index))
470 info->levels[i].split = false;
471 }
472 }
473
474 static void
475 mark_array_usage_impl(nir_function_impl *impl,
476 struct hash_table *var_info_map,
477 nir_variable_mode modes,
478 void *mem_ctx)
479 {
480 nir_foreach_block(block, impl) {
481 nir_foreach_instr(instr, block) {
482 if (instr->type != nir_instr_type_intrinsic)
483 continue;
484
485 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
486 switch (intrin->intrinsic) {
487 case nir_intrinsic_copy_deref:
488 mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
489 var_info_map, modes, mem_ctx);
490 /* Fall Through */
491
492 case nir_intrinsic_load_deref:
493 case nir_intrinsic_store_deref:
494 mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
495 var_info_map, modes, mem_ctx);
496 break;
497
498 default:
499 break;
500 }
501 }
502 }
503 }
504
505 static void
506 create_split_array_vars(struct array_var_info *var_info,
507 unsigned level,
508 struct array_split *split,
509 const char *name,
510 nir_shader *shader,
511 nir_function_impl *impl,
512 void *mem_ctx)
513 {
514 while (level < var_info->num_levels && !var_info->levels[level].split) {
515 name = ralloc_asprintf(mem_ctx, "%s[*]", name);
516 level++;
517 }
518
519 if (level == var_info->num_levels) {
520 /* We add parens to the variable name so it looks like "(foo[2][*])" so
521 * that further derefs will look like "(foo[2][*])[ssa_6]"
522 */
523 name = ralloc_asprintf(mem_ctx, "(%s)", name);
524
525 nir_variable_mode mode = var_info->base_var->data.mode;
526 if (mode == nir_var_function_temp) {
527 split->var = nir_local_variable_create(impl,
528 var_info->split_var_type, name);
529 } else {
530 split->var = nir_variable_create(shader, mode,
531 var_info->split_var_type, name);
532 }
533 } else {
534 assert(var_info->levels[level].split);
535 split->num_splits = var_info->levels[level].array_len;
536 split->splits = rzalloc_array(mem_ctx, struct array_split,
537 split->num_splits);
538 for (unsigned i = 0; i < split->num_splits; i++) {
539 create_split_array_vars(var_info, level + 1, &split->splits[i],
540 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
541 shader, impl, mem_ctx);
542 }
543 }
544 }
545
546 static bool
547 split_var_list_arrays(nir_shader *shader,
548 nir_function_impl *impl,
549 struct exec_list *vars,
550 nir_variable_mode mode,
551 struct hash_table *var_info_map,
552 void *mem_ctx)
553 {
554 struct exec_list split_vars;
555 exec_list_make_empty(&split_vars);
556
557 nir_foreach_variable_safe(var, vars) {
558 if (var->data.mode != mode)
559 continue;
560
561 struct array_var_info *info = get_array_var_info(var, var_info_map);
562 if (!info)
563 continue;
564
565 bool has_split = false;
566 const struct glsl_type *split_type =
567 glsl_without_array_or_matrix(var->type);
568 for (int i = info->num_levels - 1; i >= 0; i--) {
569 if (info->levels[i].split) {
570 has_split = true;
571 continue;
572 }
573
574 /* If the original type was a matrix type, we'd like to keep that so
575 * we don't convert matrices into arrays.
576 */
577 if (i == info->num_levels - 1 &&
578 glsl_type_is_matrix(glsl_without_array(var->type))) {
579 split_type = glsl_matrix_type(glsl_get_base_type(split_type),
580 glsl_get_components(split_type),
581 info->levels[i].array_len);
582 } else {
583 split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
584 }
585 }
586
587 if (has_split) {
588 info->split_var_type = split_type;
589 /* To avoid list confusion (we'll be adding things as we split
590 * variables), pull all of the variables we plan to split off of the
591 * main variable list.
592 */
593 exec_node_remove(&var->node);
594 exec_list_push_tail(&split_vars, &var->node);
595 } else {
596 assert(split_type == glsl_get_bare_type(var->type));
597 /* If we're not modifying this variable, delete the info so we skip
598 * it faster in later passes.
599 */
600 _mesa_hash_table_remove_key(var_info_map, var);
601 }
602 }
603
604 nir_foreach_variable(var, &split_vars) {
605 struct array_var_info *info = get_array_var_info(var, var_info_map);
606 create_split_array_vars(info, 0, &info->root_split, var->name,
607 shader, impl, mem_ctx);
608 }
609
610 return !exec_list_is_empty(&split_vars);
611 }
612
613 static bool
614 deref_has_split_wildcard(nir_deref_path *path,
615 struct array_var_info *info)
616 {
617 if (info == NULL)
618 return false;
619
620 assert(path->path[0]->var == info->base_var);
621 for (unsigned i = 0; i < info->num_levels; i++) {
622 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
623 info->levels[i].split)
624 return true;
625 }
626
627 return false;
628 }
629
630 static bool
631 array_path_is_out_of_bounds(nir_deref_path *path,
632 struct array_var_info *info)
633 {
634 if (info == NULL)
635 return false;
636
637 assert(path->path[0]->var == info->base_var);
638 for (unsigned i = 0; i < info->num_levels; i++) {
639 nir_deref_instr *p = path->path[i + 1];
640 if (p->deref_type == nir_deref_type_array_wildcard)
641 continue;
642
643 if (nir_src_is_const(p->arr.index) &&
644 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
645 return true;
646 }
647
648 return false;
649 }
650
651 static void
652 emit_split_copies(nir_builder *b,
653 struct array_var_info *dst_info, nir_deref_path *dst_path,
654 unsigned dst_level, nir_deref_instr *dst,
655 struct array_var_info *src_info, nir_deref_path *src_path,
656 unsigned src_level, nir_deref_instr *src)
657 {
658 nir_deref_instr *dst_p, *src_p;
659
660 while ((dst_p = dst_path->path[dst_level + 1])) {
661 if (dst_p->deref_type == nir_deref_type_array_wildcard)
662 break;
663
664 dst = nir_build_deref_follower(b, dst, dst_p);
665 dst_level++;
666 }
667
668 while ((src_p = src_path->path[src_level + 1])) {
669 if (src_p->deref_type == nir_deref_type_array_wildcard)
670 break;
671
672 src = nir_build_deref_follower(b, src, src_p);
673 src_level++;
674 }
675
676 if (src_p == NULL || dst_p == NULL) {
677 assert(src_p == NULL && dst_p == NULL);
678 nir_copy_deref(b, dst, src);
679 } else {
680 assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
681 src_p->deref_type == nir_deref_type_array_wildcard);
682
683 if ((dst_info && dst_info->levels[dst_level].split) ||
684 (src_info && src_info->levels[src_level].split)) {
685 /* There are no indirects at this level on one of the source or the
686 * destination so we are lowering it.
687 */
688 assert(glsl_get_length(dst_path->path[dst_level]->type) ==
689 glsl_get_length(src_path->path[src_level]->type));
690 unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
691 for (unsigned i = 0; i < len; i++) {
692 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
693 nir_build_deref_array_imm(b, dst, i),
694 src_info, src_path, src_level + 1,
695 nir_build_deref_array_imm(b, src, i));
696 }
697 } else {
698 /* Neither side is being split so we just keep going */
699 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
700 nir_build_deref_array_wildcard(b, dst),
701 src_info, src_path, src_level + 1,
702 nir_build_deref_array_wildcard(b, src));
703 }
704 }
705 }
706
707 static void
708 split_array_copies_impl(nir_function_impl *impl,
709 struct hash_table *var_info_map,
710 nir_variable_mode modes,
711 void *mem_ctx)
712 {
713 nir_builder b;
714 nir_builder_init(&b, impl);
715
716 nir_foreach_block(block, impl) {
717 nir_foreach_instr_safe(instr, block) {
718 if (instr->type != nir_instr_type_intrinsic)
719 continue;
720
721 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
722 if (copy->intrinsic != nir_intrinsic_copy_deref)
723 continue;
724
725 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
726 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
727
728 struct array_var_info *dst_info =
729 get_array_deref_info(dst_deref, var_info_map, modes);
730 struct array_var_info *src_info =
731 get_array_deref_info(src_deref, var_info_map, modes);
732
733 if (!src_info && !dst_info)
734 continue;
735
736 nir_deref_path dst_path, src_path;
737 nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
738 nir_deref_path_init(&src_path, src_deref, mem_ctx);
739
740 if (!deref_has_split_wildcard(&dst_path, dst_info) &&
741 !deref_has_split_wildcard(&src_path, src_info))
742 continue;
743
744 b.cursor = nir_instr_remove(&copy->instr);
745
746 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
747 src_info, &src_path, 0, src_path.path[0]);
748 }
749 }
750 }
751
752 static void
753 split_array_access_impl(nir_function_impl *impl,
754 struct hash_table *var_info_map,
755 nir_variable_mode modes,
756 void *mem_ctx)
757 {
758 nir_builder b;
759 nir_builder_init(&b, impl);
760
761 nir_foreach_block(block, impl) {
762 nir_foreach_instr_safe(instr, block) {
763 if (instr->type == nir_instr_type_deref) {
764 /* Clean up any dead derefs we find lying around. They may refer
765 * to variables we're planning to split.
766 */
767 nir_deref_instr *deref = nir_instr_as_deref(instr);
768 if (deref->mode & modes)
769 nir_deref_instr_remove_if_unused(deref);
770 continue;
771 }
772
773 if (instr->type != nir_instr_type_intrinsic)
774 continue;
775
776 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
777 if (intrin->intrinsic != nir_intrinsic_load_deref &&
778 intrin->intrinsic != nir_intrinsic_store_deref &&
779 intrin->intrinsic != nir_intrinsic_copy_deref)
780 continue;
781
782 const unsigned num_derefs =
783 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
784
785 for (unsigned d = 0; d < num_derefs; d++) {
786 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
787
788 struct array_var_info *info =
789 get_array_deref_info(deref, var_info_map, modes);
790 if (!info)
791 continue;
792
793 nir_deref_path path;
794 nir_deref_path_init(&path, deref, mem_ctx);
795
796 b.cursor = nir_before_instr(&intrin->instr);
797
798 if (array_path_is_out_of_bounds(&path, info)) {
799 /* If one of the derefs is out-of-bounds, we just delete the
800 * instruction. If a destination is out of bounds, then it may
801 * have been in-bounds prior to shrinking so we don't want to
802 * accidentally stomp something. However, we've already proven
803 * that it will never be read so it's safe to delete. If a
804 * source is out of bounds then it is loading random garbage.
805 * For loads, we replace their uses with an undef instruction
806 * and for copies we just delete the copy since it was writing
807 * undefined garbage anyway and we may as well leave the random
808 * garbage in the destination alone.
809 */
810 if (intrin->intrinsic == nir_intrinsic_load_deref) {
811 nir_ssa_def *u =
812 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
813 intrin->dest.ssa.bit_size);
814 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
815 nir_src_for_ssa(u));
816 }
817 nir_instr_remove(&intrin->instr);
818 for (unsigned i = 0; i < num_derefs; i++)
819 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
820 break;
821 }
822
823 struct array_split *split = &info->root_split;
824 for (unsigned i = 0; i < info->num_levels; i++) {
825 if (info->levels[i].split) {
826 nir_deref_instr *p = path.path[i + 1];
827 unsigned index = nir_src_as_uint(p->arr.index);
828 assert(index < info->levels[i].array_len);
829 split = &split->splits[index];
830 }
831 }
832 assert(!split->splits && split->var);
833
834 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
835 for (unsigned i = 0; i < info->num_levels; i++) {
836 if (!info->levels[i].split) {
837 new_deref = nir_build_deref_follower(&b, new_deref,
838 path.path[i + 1]);
839 }
840 }
841 assert(new_deref->type == deref->type);
842
843 /* Rewrite the deref source to point to the split one */
844 nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
845 nir_src_for_ssa(&new_deref->dest.ssa));
846 nir_deref_instr_remove_if_unused(deref);
847 }
848 }
849 }
850 }
851
852 /** A pass for splitting arrays of vectors into multiple variables
853 *
854 * This pass looks at arrays (possibly multiple levels) of vectors (not
855 * structures or other types) and tries to split them into piles of variables,
856 * one for each array element. The heuristic used is simple: If a given array
857 * level is never used with an indirect, that array level will get split.
858 *
859 * This pass probably could handles structures easily enough but making a pass
860 * that could see through an array of structures of arrays would be difficult
861 * so it's best to just run nir_split_struct_vars first.
862 */
863 bool
864 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
865 {
866 void *mem_ctx = ralloc_context(NULL);
867 struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
868 struct set *complex_vars = NULL;
869
870 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
871
872 bool has_global_array = false;
873 if (modes & nir_var_shader_temp) {
874 has_global_array = init_var_list_array_infos(shader,
875 &shader->globals,
876 nir_var_shader_temp,
877 var_info_map,
878 &complex_vars,
879 mem_ctx);
880 }
881
882 bool has_any_array = false;
883 nir_foreach_function(function, shader) {
884 if (!function->impl)
885 continue;
886
887 bool has_local_array = false;
888 if (modes & nir_var_function_temp) {
889 has_local_array = init_var_list_array_infos(shader,
890 &function->impl->locals,
891 nir_var_function_temp,
892 var_info_map,
893 &complex_vars,
894 mem_ctx);
895 }
896
897 if (has_global_array || has_local_array) {
898 has_any_array = true;
899 mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
900 }
901 }
902
903 /* If we failed to find any arrays of arrays, bail early. */
904 if (!has_any_array) {
905 ralloc_free(mem_ctx);
906 nir_shader_preserve_all_metadata(shader);
907 return false;
908 }
909
910 bool has_global_splits = false;
911 if (modes & nir_var_shader_temp) {
912 has_global_splits = split_var_list_arrays(shader, NULL,
913 &shader->globals,
914 nir_var_shader_temp,
915 var_info_map, mem_ctx);
916 }
917
918 bool progress = false;
919 nir_foreach_function(function, shader) {
920 if (!function->impl)
921 continue;
922
923 bool has_local_splits = false;
924 if (modes & nir_var_function_temp) {
925 has_local_splits = split_var_list_arrays(shader, function->impl,
926 &function->impl->locals,
927 nir_var_function_temp,
928 var_info_map, mem_ctx);
929 }
930
931 if (has_global_splits || has_local_splits) {
932 split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
933 split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
934
935 nir_metadata_preserve(function->impl, nir_metadata_block_index |
936 nir_metadata_dominance);
937 progress = true;
938 } else {
939 nir_metadata_preserve(function->impl, nir_metadata_all);
940 }
941 }
942
943 ralloc_free(mem_ctx);
944
945 return progress;
946 }
947
948 struct array_level_usage {
949 unsigned array_len;
950
951 /* The value UINT_MAX will be used to indicate an indirect */
952 unsigned max_read;
953 unsigned max_written;
954
955 /* True if there is a copy that isn't to/from a shrinkable array */
956 bool has_external_copy;
957 struct set *levels_copied;
958 };
959
960 struct vec_var_usage {
961 /* Convenience set of all components this variable has */
962 nir_component_mask_t all_comps;
963
964 nir_component_mask_t comps_read;
965 nir_component_mask_t comps_written;
966
967 nir_component_mask_t comps_kept;
968
969 /* True if there is a copy that isn't to/from a shrinkable vector */
970 bool has_external_copy;
971 bool has_complex_use;
972 struct set *vars_copied;
973
974 unsigned num_levels;
975 struct array_level_usage levels[0];
976 };
977
978 static struct vec_var_usage *
979 get_vec_var_usage(nir_variable *var,
980 struct hash_table *var_usage_map,
981 bool add_usage_entry, void *mem_ctx)
982 {
983 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
984 if (entry)
985 return entry->data;
986
987 if (!add_usage_entry)
988 return NULL;
989
990 /* Check to make sure that we are working with an array of vectors. We
991 * don't bother to shrink single vectors because we figure that we can
992 * clean it up better with SSA than by inserting piles of vecN instructions
993 * to compact results.
994 */
995 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
996 if (num_levels < 1)
997 return NULL; /* Not an array of vectors */
998
999 struct vec_var_usage *usage =
1000 rzalloc_size(mem_ctx, sizeof(*usage) +
1001 num_levels * sizeof(usage->levels[0]));
1002
1003 usage->num_levels = num_levels;
1004 const struct glsl_type *type = var->type;
1005 for (unsigned i = 0; i < num_levels; i++) {
1006 usage->levels[i].array_len = glsl_get_length(type);
1007 type = glsl_get_array_element(type);
1008 }
1009 assert(glsl_type_is_vector_or_scalar(type));
1010
1011 usage->all_comps = (1 << glsl_get_components(type)) - 1;
1012
1013 _mesa_hash_table_insert(var_usage_map, var, usage);
1014
1015 return usage;
1016 }
1017
1018 static struct vec_var_usage *
1019 get_vec_deref_usage(nir_deref_instr *deref,
1020 struct hash_table *var_usage_map,
1021 nir_variable_mode modes,
1022 bool add_usage_entry, void *mem_ctx)
1023 {
1024 if (!(deref->mode & modes))
1025 return NULL;
1026
1027 return get_vec_var_usage(nir_deref_instr_get_variable(deref),
1028 var_usage_map, add_usage_entry, mem_ctx);
1029 }
1030
1031 static void
1032 mark_deref_if_complex(nir_deref_instr *deref,
1033 struct hash_table *var_usage_map,
1034 nir_variable_mode modes,
1035 void *mem_ctx)
1036 {
1037 if (!(deref->mode & modes))
1038 return;
1039
1040 /* Only bother with var derefs because nir_deref_instr_has_complex_use is
1041 * recursive.
1042 */
1043 if (deref->deref_type != nir_deref_type_var)
1044 return;
1045
1046 if (!nir_deref_instr_has_complex_use(deref))
1047 return;
1048
1049 struct vec_var_usage *usage =
1050 get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx);
1051 if (!usage)
1052 return;
1053
1054 usage->has_complex_use = true;
1055 }
1056
1057 static void
1058 mark_deref_used(nir_deref_instr *deref,
1059 nir_component_mask_t comps_read,
1060 nir_component_mask_t comps_written,
1061 nir_deref_instr *copy_deref,
1062 struct hash_table *var_usage_map,
1063 nir_variable_mode modes,
1064 void *mem_ctx)
1065 {
1066 if (!(deref->mode & modes))
1067 return;
1068
1069 nir_variable *var = nir_deref_instr_get_variable(deref);
1070 if (var == NULL)
1071 return;
1072
1073 struct vec_var_usage *usage =
1074 get_vec_var_usage(var, var_usage_map, true, mem_ctx);
1075 if (!usage)
1076 return;
1077
1078 usage->comps_read |= comps_read & usage->all_comps;
1079 usage->comps_written |= comps_written & usage->all_comps;
1080
1081 struct vec_var_usage *copy_usage = NULL;
1082 if (copy_deref) {
1083 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
1084 true, mem_ctx);
1085 if (copy_usage) {
1086 if (usage->vars_copied == NULL) {
1087 usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
1088 }
1089 _mesa_set_add(usage->vars_copied, copy_usage);
1090 } else {
1091 usage->has_external_copy = true;
1092 }
1093 }
1094
1095 nir_deref_path path;
1096 nir_deref_path_init(&path, deref, mem_ctx);
1097
1098 nir_deref_path copy_path;
1099 if (copy_usage)
1100 nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
1101
1102 unsigned copy_i = 0;
1103 for (unsigned i = 0; i < usage->num_levels; i++) {
1104 struct array_level_usage *level = &usage->levels[i];
1105 nir_deref_instr *deref = path.path[i + 1];
1106 assert(deref->deref_type == nir_deref_type_array ||
1107 deref->deref_type == nir_deref_type_array_wildcard);
1108
1109 unsigned max_used;
1110 if (deref->deref_type == nir_deref_type_array) {
1111 max_used = nir_src_is_const(deref->arr.index) ?
1112 nir_src_as_uint(deref->arr.index) : UINT_MAX;
1113 } else {
1114 /* For wildcards, we read or wrote the whole thing. */
1115 assert(deref->deref_type == nir_deref_type_array_wildcard);
1116 max_used = level->array_len - 1;
1117
1118 if (copy_usage) {
1119 /* Match each wildcard level with the level on copy_usage */
1120 for (; copy_path.path[copy_i + 1]; copy_i++) {
1121 if (copy_path.path[copy_i + 1]->deref_type ==
1122 nir_deref_type_array_wildcard)
1123 break;
1124 }
1125 struct array_level_usage *copy_level =
1126 &copy_usage->levels[copy_i++];
1127
1128 if (level->levels_copied == NULL) {
1129 level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1130 }
1131 _mesa_set_add(level->levels_copied, copy_level);
1132 } else {
1133 /* We have a wildcard and it comes from a variable we aren't
1134 * tracking; flag it and we'll know to not shorten this array.
1135 */
1136 level->has_external_copy = true;
1137 }
1138 }
1139
1140 if (comps_written)
1141 level->max_written = MAX2(level->max_written, max_used);
1142 if (comps_read)
1143 level->max_read = MAX2(level->max_read, max_used);
1144 }
1145 }
1146
1147 static bool
1148 src_is_load_deref(nir_src src, nir_src deref_src)
1149 {
1150 nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1151 if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1152 return false;
1153
1154 assert(load->src[0].is_ssa);
1155
1156 return load->src[0].ssa == deref_src.ssa;
1157 }
1158
1159 /* Returns all non-self-referential components of a store instruction. A
1160 * component is self-referential if it comes from the same component of a load
1161 * instruction on the same deref. If the only data in a particular component
1162 * of a variable came directly from that component then it's undefined. The
1163 * only way to get defined data into a component of a variable is for it to
1164 * get written there by something outside or from a different component.
1165 *
1166 * This is a fairly common pattern in shaders that come from either GLSL IR or
1167 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1168 * load-vec-store.
1169 */
1170 static nir_component_mask_t
1171 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1172 {
1173 nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1174
1175 assert(store->src[1].is_ssa);
1176 nir_instr *src_instr = store->src[1].ssa->parent_instr;
1177 if (src_instr->type != nir_instr_type_alu)
1178 return comps;
1179
1180 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1181
1182 if (src_alu->op == nir_op_mov) {
1183 /* If it's just a swizzle of a load from the same deref, discount any
1184 * channels that don't move in the swizzle.
1185 */
1186 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1187 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1188 if (src_alu->src[0].swizzle[i] == i)
1189 comps &= ~(1u << i);
1190 }
1191 }
1192 } else if (nir_op_is_vec(src_alu->op)) {
1193 /* If it's a vec, discount any channels that are just loads from the
1194 * same deref put in the same spot.
1195 */
1196 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1197 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1198 src_alu->src[i].swizzle[0] == i)
1199 comps &= ~(1u << i);
1200 }
1201 }
1202
1203 return comps;
1204 }
1205
1206 static void
1207 find_used_components_impl(nir_function_impl *impl,
1208 struct hash_table *var_usage_map,
1209 nir_variable_mode modes,
1210 void *mem_ctx)
1211 {
1212 nir_foreach_block(block, impl) {
1213 nir_foreach_instr(instr, block) {
1214 if (instr->type == nir_instr_type_deref) {
1215 mark_deref_if_complex(nir_instr_as_deref(instr),
1216 var_usage_map, modes, mem_ctx);
1217 }
1218
1219 if (instr->type != nir_instr_type_intrinsic)
1220 continue;
1221
1222 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1223 switch (intrin->intrinsic) {
1224 case nir_intrinsic_load_deref:
1225 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1226 nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1227 NULL, var_usage_map, modes, mem_ctx);
1228 break;
1229
1230 case nir_intrinsic_store_deref:
1231 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1232 0, get_non_self_referential_store_comps(intrin),
1233 NULL, var_usage_map, modes, mem_ctx);
1234 break;
1235
1236 case nir_intrinsic_copy_deref: {
1237 /* Just mark everything used for copies. */
1238 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1239 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1240 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1241 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1242 break;
1243 }
1244
1245 default:
1246 break;
1247 }
1248 }
1249 }
1250 }
1251
1252 static bool
1253 shrink_vec_var_list(struct exec_list *vars,
1254 nir_variable_mode mode,
1255 struct hash_table *var_usage_map)
1256 {
1257 /* Initialize the components kept field of each variable. This is the
1258 * AND of the components written and components read. If a component is
1259 * written but never read, it's dead. If it is read but never written,
1260 * then all values read are undefined garbage and we may as well not read
1261 * them.
1262 *
1263 * The same logic applies to the array length. We make the array length
1264 * the minimum needed required length between read and write and plan to
1265 * discard any OOB access. The one exception here is indirect writes
1266 * because we don't know where they will land and we can't shrink an array
1267 * with indirect writes because previously in-bounds writes may become
1268 * out-of-bounds and have undefined behavior.
1269 *
1270 * Also, if we have a copy that to/from something we can't shrink, we need
1271 * to leave components and array_len of any wildcards alone.
1272 */
1273 nir_foreach_variable(var, vars) {
1274 if (var->data.mode != mode)
1275 continue;
1276
1277 struct vec_var_usage *usage =
1278 get_vec_var_usage(var, var_usage_map, false, NULL);
1279 if (!usage)
1280 continue;
1281
1282 assert(usage->comps_kept == 0);
1283 if (usage->has_external_copy || usage->has_complex_use)
1284 usage->comps_kept = usage->all_comps;
1285 else
1286 usage->comps_kept = usage->comps_read & usage->comps_written;
1287
1288 for (unsigned i = 0; i < usage->num_levels; i++) {
1289 struct array_level_usage *level = &usage->levels[i];
1290 assert(level->array_len > 0);
1291
1292 if (level->max_written == UINT_MAX || level->has_external_copy ||
1293 usage->has_complex_use)
1294 continue; /* Can't shrink */
1295
1296 unsigned max_used = MIN2(level->max_read, level->max_written);
1297 level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1298 }
1299 }
1300
1301 /* In order for variable copies to work, we have to have the same data type
1302 * on the source and the destination. In order to satisfy this, we run a
1303 * little fixed-point algorithm to transitively ensure that we get enough
1304 * components and array elements for this to hold for all copies.
1305 */
1306 bool fp_progress;
1307 do {
1308 fp_progress = false;
1309 nir_foreach_variable(var, vars) {
1310 if (var->data.mode != mode)
1311 continue;
1312
1313 struct vec_var_usage *var_usage =
1314 get_vec_var_usage(var, var_usage_map, false, NULL);
1315 if (!var_usage || !var_usage->vars_copied)
1316 continue;
1317
1318 set_foreach(var_usage->vars_copied, copy_entry) {
1319 struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1320 if (copy_usage->comps_kept != var_usage->comps_kept) {
1321 nir_component_mask_t comps_kept =
1322 (var_usage->comps_kept | copy_usage->comps_kept);
1323 var_usage->comps_kept = comps_kept;
1324 copy_usage->comps_kept = comps_kept;
1325 fp_progress = true;
1326 }
1327 }
1328
1329 for (unsigned i = 0; i < var_usage->num_levels; i++) {
1330 struct array_level_usage *var_level = &var_usage->levels[i];
1331 if (!var_level->levels_copied)
1332 continue;
1333
1334 set_foreach(var_level->levels_copied, copy_entry) {
1335 struct array_level_usage *copy_level = (void *)copy_entry->key;
1336 if (var_level->array_len != copy_level->array_len) {
1337 unsigned array_len =
1338 MAX2(var_level->array_len, copy_level->array_len);
1339 var_level->array_len = array_len;
1340 copy_level->array_len = array_len;
1341 fp_progress = true;
1342 }
1343 }
1344 }
1345 }
1346 } while (fp_progress);
1347
1348 bool vars_shrunk = false;
1349 nir_foreach_variable_safe(var, vars) {
1350 if (var->data.mode != mode)
1351 continue;
1352
1353 struct vec_var_usage *usage =
1354 get_vec_var_usage(var, var_usage_map, false, NULL);
1355 if (!usage)
1356 continue;
1357
1358 bool shrunk = false;
1359 const struct glsl_type *vec_type = var->type;
1360 for (unsigned i = 0; i < usage->num_levels; i++) {
1361 /* If we've reduced the array to zero elements at some level, just
1362 * set comps_kept to 0 and delete the variable.
1363 */
1364 if (usage->levels[i].array_len == 0) {
1365 usage->comps_kept = 0;
1366 break;
1367 }
1368
1369 assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1370 if (usage->levels[i].array_len < glsl_get_length(vec_type))
1371 shrunk = true;
1372 vec_type = glsl_get_array_element(vec_type);
1373 }
1374 assert(glsl_type_is_vector_or_scalar(vec_type));
1375
1376 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1377 if (usage->comps_kept != usage->all_comps)
1378 shrunk = true;
1379
1380 if (usage->comps_kept == 0) {
1381 /* This variable is dead, remove it */
1382 vars_shrunk = true;
1383 exec_node_remove(&var->node);
1384 continue;
1385 }
1386
1387 if (!shrunk) {
1388 /* This variable doesn't need to be shrunk. Remove it from the
1389 * hash table so later steps will ignore it.
1390 */
1391 _mesa_hash_table_remove_key(var_usage_map, var);
1392 continue;
1393 }
1394
1395 /* Build the new var type */
1396 unsigned new_num_comps = util_bitcount(usage->comps_kept);
1397 const struct glsl_type *new_type =
1398 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1399 for (int i = usage->num_levels - 1; i >= 0; i--) {
1400 assert(usage->levels[i].array_len > 0);
1401 /* If the original type was a matrix type, we'd like to keep that so
1402 * we don't convert matrices into arrays.
1403 */
1404 if (i == usage->num_levels - 1 &&
1405 glsl_type_is_matrix(glsl_without_array(var->type)) &&
1406 new_num_comps > 1 && usage->levels[i].array_len > 1) {
1407 new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1408 new_num_comps,
1409 usage->levels[i].array_len);
1410 } else {
1411 new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1412 }
1413 }
1414 var->type = new_type;
1415
1416 vars_shrunk = true;
1417 }
1418
1419 return vars_shrunk;
1420 }
1421
1422 static bool
1423 vec_deref_is_oob(nir_deref_instr *deref,
1424 struct vec_var_usage *usage)
1425 {
1426 nir_deref_path path;
1427 nir_deref_path_init(&path, deref, NULL);
1428
1429 bool oob = false;
1430 for (unsigned i = 0; i < usage->num_levels; i++) {
1431 nir_deref_instr *p = path.path[i + 1];
1432 if (p->deref_type == nir_deref_type_array_wildcard)
1433 continue;
1434
1435 if (nir_src_is_const(p->arr.index) &&
1436 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1437 oob = true;
1438 break;
1439 }
1440 }
1441
1442 nir_deref_path_finish(&path);
1443
1444 return oob;
1445 }
1446
1447 static bool
1448 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1449 struct hash_table *var_usage_map,
1450 nir_variable_mode modes)
1451 {
1452 struct vec_var_usage *usage =
1453 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1454 if (!usage)
1455 return false;
1456
1457 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1458 }
1459
1460 static void
1461 shrink_vec_var_access_impl(nir_function_impl *impl,
1462 struct hash_table *var_usage_map,
1463 nir_variable_mode modes)
1464 {
1465 nir_builder b;
1466 nir_builder_init(&b, impl);
1467
1468 nir_foreach_block(block, impl) {
1469 nir_foreach_instr_safe(instr, block) {
1470 switch (instr->type) {
1471 case nir_instr_type_deref: {
1472 nir_deref_instr *deref = nir_instr_as_deref(instr);
1473 if (!(deref->mode & modes))
1474 break;
1475
1476 /* Clean up any dead derefs we find lying around. They may refer
1477 * to variables we've deleted.
1478 */
1479 if (nir_deref_instr_remove_if_unused(deref))
1480 break;
1481
1482 /* Update the type in the deref to keep the types consistent as
1483 * you walk down the chain. We don't need to check if this is one
1484 * of the derefs we're shrinking because this is a no-op if it
1485 * isn't. The worst that could happen is that we accidentally fix
1486 * an invalid deref.
1487 */
1488 if (deref->deref_type == nir_deref_type_var) {
1489 deref->type = deref->var->type;
1490 } else if (deref->deref_type == nir_deref_type_array ||
1491 deref->deref_type == nir_deref_type_array_wildcard) {
1492 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1493 assert(glsl_type_is_array(parent->type) ||
1494 glsl_type_is_matrix(parent->type));
1495 deref->type = glsl_get_array_element(parent->type);
1496 }
1497 break;
1498 }
1499
1500 case nir_instr_type_intrinsic: {
1501 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1502
1503 /* If we have a copy whose source or destination has been deleted
1504 * because we determined the variable was dead, then we just
1505 * delete the copy instruction. If the source variable was dead
1506 * then it was writing undefined garbage anyway and if it's the
1507 * destination variable that's dead then the write isn't needed.
1508 */
1509 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1510 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1511 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1512 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1513 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1514 nir_instr_remove(&intrin->instr);
1515 nir_deref_instr_remove_if_unused(dst);
1516 nir_deref_instr_remove_if_unused(src);
1517 }
1518 continue;
1519 }
1520
1521 if (intrin->intrinsic != nir_intrinsic_load_deref &&
1522 intrin->intrinsic != nir_intrinsic_store_deref)
1523 continue;
1524
1525 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1526 if (!(deref->mode & modes))
1527 continue;
1528
1529 struct vec_var_usage *usage =
1530 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1531 if (!usage)
1532 continue;
1533
1534 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1535 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1536 nir_ssa_def *u =
1537 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1538 intrin->dest.ssa.bit_size);
1539 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1540 nir_src_for_ssa(u));
1541 }
1542 nir_instr_remove(&intrin->instr);
1543 nir_deref_instr_remove_if_unused(deref);
1544 continue;
1545 }
1546
1547 /* If we're not dropping any components, there's no need to
1548 * compact vectors.
1549 */
1550 if (usage->comps_kept == usage->all_comps)
1551 continue;
1552
1553 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1554 b.cursor = nir_after_instr(&intrin->instr);
1555
1556 nir_ssa_def *undef =
1557 nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1558 nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1559 unsigned c = 0;
1560 for (unsigned i = 0; i < intrin->num_components; i++) {
1561 if (usage->comps_kept & (1u << i))
1562 vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1563 else
1564 vec_srcs[i] = undef;
1565 }
1566 nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1567
1568 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1569 nir_src_for_ssa(vec),
1570 vec->parent_instr);
1571
1572 /* The SSA def is now only used by the swizzle. It's safe to
1573 * shrink the number of components.
1574 */
1575 assert(list_length(&intrin->dest.ssa.uses) == c);
1576 intrin->num_components = c;
1577 intrin->dest.ssa.num_components = c;
1578 } else {
1579 nir_component_mask_t write_mask =
1580 nir_intrinsic_write_mask(intrin);
1581
1582 unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1583 nir_component_mask_t new_write_mask = 0;
1584 unsigned c = 0;
1585 for (unsigned i = 0; i < intrin->num_components; i++) {
1586 if (usage->comps_kept & (1u << i)) {
1587 swizzle[c] = i;
1588 if (write_mask & (1u << i))
1589 new_write_mask |= 1u << c;
1590 c++;
1591 }
1592 }
1593
1594 b.cursor = nir_before_instr(&intrin->instr);
1595
1596 nir_ssa_def *swizzled =
1597 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c);
1598
1599 /* Rewrite to use the compacted source */
1600 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1601 nir_src_for_ssa(swizzled));
1602 nir_intrinsic_set_write_mask(intrin, new_write_mask);
1603 intrin->num_components = c;
1604 }
1605 break;
1606 }
1607
1608 default:
1609 break;
1610 }
1611 }
1612 }
1613 }
1614
1615 static bool
1616 function_impl_has_vars_with_modes(nir_function_impl *impl,
1617 nir_variable_mode modes)
1618 {
1619 nir_shader *shader = impl->function->shader;
1620
1621 if ((modes & nir_var_shader_temp) && !exec_list_is_empty(&shader->globals))
1622 return true;
1623
1624 if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1625 return true;
1626
1627 return false;
1628 }
1629
1630 /** Attempt to shrink arrays of vectors
1631 *
1632 * This pass looks at variables which contain a vector or an array (possibly
1633 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1634 * or array. If the pass can prove that a component of a vector (or array of
1635 * vectors) is never really used, then that component will be removed.
1636 * Similarly, the pass attempts to shorten arrays based on what elements it
1637 * can prove are never read or never contain valid data.
1638 */
1639 bool
1640 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1641 {
1642 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1643
1644 void *mem_ctx = ralloc_context(NULL);
1645
1646 struct hash_table *var_usage_map =
1647 _mesa_pointer_hash_table_create(mem_ctx);
1648
1649 bool has_vars_to_shrink = false;
1650 nir_foreach_function(function, shader) {
1651 if (!function->impl)
1652 continue;
1653
1654 /* Don't even bother crawling the IR if we don't have any variables.
1655 * Given that this pass deletes any unused variables, it's likely that
1656 * we will be in this scenario eventually.
1657 */
1658 if (function_impl_has_vars_with_modes(function->impl, modes)) {
1659 has_vars_to_shrink = true;
1660 find_used_components_impl(function->impl, var_usage_map,
1661 modes, mem_ctx);
1662 }
1663 }
1664 if (!has_vars_to_shrink) {
1665 ralloc_free(mem_ctx);
1666 nir_shader_preserve_all_metadata(shader);
1667 return false;
1668 }
1669
1670 bool globals_shrunk = false;
1671 if (modes & nir_var_shader_temp) {
1672 globals_shrunk = shrink_vec_var_list(&shader->globals,
1673 nir_var_shader_temp,
1674 var_usage_map);
1675 }
1676
1677 bool progress = false;
1678 nir_foreach_function(function, shader) {
1679 if (!function->impl)
1680 continue;
1681
1682 bool locals_shrunk = false;
1683 if (modes & nir_var_function_temp) {
1684 locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1685 nir_var_function_temp,
1686 var_usage_map);
1687 }
1688
1689 if (globals_shrunk || locals_shrunk) {
1690 shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1691
1692 nir_metadata_preserve(function->impl, nir_metadata_block_index |
1693 nir_metadata_dominance);
1694 progress = true;
1695 } else {
1696 nir_metadata_preserve(function->impl, nir_metadata_all);
1697 }
1698 }
1699
1700 ralloc_free(mem_ctx);
1701
1702 return progress;
1703 }