5044d29f146e5f63165f94578dff7a425d4d89a1
[mesa.git] / src / compiler / nir / nir_split_vars.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28
29 #include "util/u_math.h"
30
31
32 struct split_var_state {
33 void *mem_ctx;
34
35 nir_shader *shader;
36 nir_function_impl *impl;
37
38 nir_variable *base_var;
39 };
40
41 struct field {
42 struct field *parent;
43
44 const struct glsl_type *type;
45
46 unsigned num_fields;
47 struct field *fields;
48
49 nir_variable *var;
50 };
51
52 static const struct glsl_type *
53 wrap_type_in_array(const struct glsl_type *type,
54 const struct glsl_type *array_type)
55 {
56 if (!glsl_type_is_array(array_type))
57 return type;
58
59 const struct glsl_type *elem_type =
60 wrap_type_in_array(type, glsl_get_array_element(array_type));
61 assert(glsl_get_explicit_stride(array_type) == 0);
62 return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
63 }
64
65 static int
66 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
67 {
68 int num_levels = 0;
69 while (true) {
70 if (glsl_type_is_array_or_matrix(type)) {
71 num_levels++;
72 type = glsl_get_array_element(type);
73 } else if (glsl_type_is_vector_or_scalar(type)) {
74 return num_levels;
75 } else {
76 /* Not an array of vectors */
77 return -1;
78 }
79 }
80 }
81
82 static void
83 init_field_for_type(struct field *field, struct field *parent,
84 const struct glsl_type *type,
85 const char *name,
86 struct split_var_state *state)
87 {
88 *field = (struct field) {
89 .parent = parent,
90 .type = type,
91 };
92
93 const struct glsl_type *struct_type = glsl_without_array(type);
94 if (glsl_type_is_struct(struct_type)) {
95 field->num_fields = glsl_get_length(struct_type),
96 field->fields = ralloc_array(state->mem_ctx, struct field,
97 field->num_fields);
98 for (unsigned i = 0; i < field->num_fields; i++) {
99 char *field_name = NULL;
100 if (name) {
101 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
102 glsl_get_struct_elem_name(struct_type, i));
103 } else {
104 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
105 glsl_get_type_name(struct_type),
106 glsl_get_struct_elem_name(struct_type, i));
107 }
108 init_field_for_type(&field->fields[i], field,
109 glsl_get_struct_field(struct_type, i),
110 field_name, state);
111 }
112 } else {
113 const struct glsl_type *var_type = type;
114 for (struct field *f = field->parent; f; f = f->parent)
115 var_type = wrap_type_in_array(var_type, f->type);
116
117 nir_variable_mode mode = state->base_var->data.mode;
118 if (mode == nir_var_function) {
119 field->var = nir_local_variable_create(state->impl, var_type, name);
120 } else {
121 field->var = nir_variable_create(state->shader, mode, var_type, name);
122 }
123 }
124 }
125
126 static bool
127 split_var_list_structs(nir_shader *shader,
128 nir_function_impl *impl,
129 struct exec_list *vars,
130 struct hash_table *var_field_map,
131 void *mem_ctx)
132 {
133 struct split_var_state state = {
134 .mem_ctx = mem_ctx,
135 .shader = shader,
136 .impl = impl,
137 };
138
139 struct exec_list split_vars;
140 exec_list_make_empty(&split_vars);
141
142 /* To avoid list confusion (we'll be adding things as we split variables),
143 * pull all of the variables we plan to split off of the list
144 */
145 nir_foreach_variable_safe(var, vars) {
146 if (!glsl_type_is_struct(glsl_without_array(var->type)))
147 continue;
148
149 exec_node_remove(&var->node);
150 exec_list_push_tail(&split_vars, &var->node);
151 }
152
153 nir_foreach_variable(var, &split_vars) {
154 state.base_var = var;
155
156 struct field *root_field = ralloc(mem_ctx, struct field);
157 init_field_for_type(root_field, NULL, var->type, var->name, &state);
158 _mesa_hash_table_insert(var_field_map, var, root_field);
159 }
160
161 return !exec_list_is_empty(&split_vars);
162 }
163
164 static void
165 split_struct_derefs_impl(nir_function_impl *impl,
166 struct hash_table *var_field_map,
167 nir_variable_mode modes,
168 void *mem_ctx)
169 {
170 nir_builder b;
171 nir_builder_init(&b, impl);
172
173 nir_foreach_block(block, impl) {
174 nir_foreach_instr_safe(instr, block) {
175 if (instr->type != nir_instr_type_deref)
176 continue;
177
178 nir_deref_instr *deref = nir_instr_as_deref(instr);
179 if (!(deref->mode & modes))
180 continue;
181
182 /* Clean up any dead derefs we find lying around. They may refer to
183 * variables we're planning to split.
184 */
185 if (nir_deref_instr_remove_if_unused(deref))
186 continue;
187
188 if (!glsl_type_is_vector_or_scalar(deref->type))
189 continue;
190
191 nir_variable *base_var = nir_deref_instr_get_variable(deref);
192 struct hash_entry *entry =
193 _mesa_hash_table_search(var_field_map, base_var);
194 if (!entry)
195 continue;
196
197 struct field *root_field = entry->data;
198
199 nir_deref_path path;
200 nir_deref_path_init(&path, deref, mem_ctx);
201
202 struct field *tail_field = root_field;
203 for (unsigned i = 0; path.path[i]; i++) {
204 if (path.path[i]->deref_type != nir_deref_type_struct)
205 continue;
206
207 assert(i > 0);
208 assert(glsl_type_is_struct(path.path[i - 1]->type));
209 assert(path.path[i - 1]->type ==
210 glsl_without_array(tail_field->type));
211
212 tail_field = &tail_field->fields[path.path[i]->strct.index];
213 }
214 nir_variable *split_var = tail_field->var;
215
216 nir_deref_instr *new_deref = NULL;
217 for (unsigned i = 0; path.path[i]; i++) {
218 nir_deref_instr *p = path.path[i];
219 b.cursor = nir_after_instr(&p->instr);
220
221 switch (p->deref_type) {
222 case nir_deref_type_var:
223 assert(new_deref == NULL);
224 new_deref = nir_build_deref_var(&b, split_var);
225 break;
226
227 case nir_deref_type_array:
228 case nir_deref_type_array_wildcard:
229 new_deref = nir_build_deref_follower(&b, new_deref, p);
230 break;
231
232 case nir_deref_type_struct:
233 /* Nothing to do; we're splitting structs */
234 break;
235
236 default:
237 unreachable("Invalid deref type in path");
238 }
239 }
240
241 assert(new_deref->type == deref->type);
242 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
243 nir_src_for_ssa(&new_deref->dest.ssa));
244 nir_deref_instr_remove_if_unused(deref);
245 }
246 }
247 }
248
249 /** A pass for splitting structs into multiple variables
250 *
251 * This pass splits arrays of structs into multiple variables, one for each
252 * (possibly nested) structure member. After this pass completes, no
253 * variables of the given mode will contain a struct type.
254 */
255 bool
256 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
257 {
258 void *mem_ctx = ralloc_context(NULL);
259 struct hash_table *var_field_map =
260 _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
261 _mesa_key_pointer_equal);
262
263 assert((modes & (nir_var_private | nir_var_function)) == modes);
264
265 bool has_global_splits = false;
266 if (modes & nir_var_private) {
267 has_global_splits = split_var_list_structs(shader, NULL,
268 &shader->globals,
269 var_field_map, mem_ctx);
270 }
271
272 bool progress = false;
273 nir_foreach_function(function, shader) {
274 if (!function->impl)
275 continue;
276
277 bool has_local_splits = false;
278 if (modes & nir_var_function) {
279 has_local_splits = split_var_list_structs(shader, function->impl,
280 &function->impl->locals,
281 var_field_map, mem_ctx);
282 }
283
284 if (has_global_splits || has_local_splits) {
285 split_struct_derefs_impl(function->impl, var_field_map,
286 modes, mem_ctx);
287
288 nir_metadata_preserve(function->impl, nir_metadata_block_index |
289 nir_metadata_dominance);
290 progress = true;
291 }
292 }
293
294 ralloc_free(mem_ctx);
295
296 return progress;
297 }
298
299 struct array_level_info {
300 unsigned array_len;
301 bool split;
302 };
303
304 struct array_split {
305 /* Only set if this is the tail end of the splitting */
306 nir_variable *var;
307
308 unsigned num_splits;
309 struct array_split *splits;
310 };
311
312 struct array_var_info {
313 nir_variable *base_var;
314
315 const struct glsl_type *split_var_type;
316
317 bool split_var;
318 struct array_split root_split;
319
320 unsigned num_levels;
321 struct array_level_info levels[0];
322 };
323
324 static bool
325 init_var_list_array_infos(struct exec_list *vars,
326 struct hash_table *var_info_map,
327 void *mem_ctx)
328 {
329 bool has_array = false;
330
331 nir_foreach_variable(var, vars) {
332 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
333 if (num_levels <= 0)
334 continue;
335
336 struct array_var_info *info =
337 rzalloc_size(mem_ctx, sizeof(*info) +
338 num_levels * sizeof(info->levels[0]));
339
340 info->base_var = var;
341 info->num_levels = num_levels;
342
343 const struct glsl_type *type = var->type;
344 for (int i = 0; i < num_levels; i++) {
345 assert(glsl_get_explicit_stride(type) == 0);
346 info->levels[i].array_len = glsl_get_length(type);
347 type = glsl_get_array_element(type);
348
349 /* All levels start out initially as split */
350 info->levels[i].split = true;
351 }
352
353 _mesa_hash_table_insert(var_info_map, var, info);
354 has_array = true;
355 }
356
357 return has_array;
358 }
359
360 static struct array_var_info *
361 get_array_var_info(nir_variable *var,
362 struct hash_table *var_info_map)
363 {
364 struct hash_entry *entry =
365 _mesa_hash_table_search(var_info_map, var);
366 return entry ? entry->data : NULL;
367 }
368
369 static struct array_var_info *
370 get_array_deref_info(nir_deref_instr *deref,
371 struct hash_table *var_info_map,
372 nir_variable_mode modes)
373 {
374 if (!(deref->mode & modes))
375 return NULL;
376
377 return get_array_var_info(nir_deref_instr_get_variable(deref),
378 var_info_map);
379 }
380
381 static void
382 mark_array_deref_used(nir_deref_instr *deref,
383 struct hash_table *var_info_map,
384 nir_variable_mode modes,
385 void *mem_ctx)
386 {
387 struct array_var_info *info =
388 get_array_deref_info(deref, var_info_map, modes);
389 if (!info)
390 return;
391
392 nir_deref_path path;
393 nir_deref_path_init(&path, deref, mem_ctx);
394
395 /* Walk the path and look for indirects. If we have an array deref with an
396 * indirect, mark the given level as not being split.
397 */
398 for (unsigned i = 0; i < info->num_levels; i++) {
399 nir_deref_instr *p = path.path[i + 1];
400 if (p->deref_type == nir_deref_type_array &&
401 !nir_src_is_const(p->arr.index))
402 info->levels[i].split = false;
403 }
404 }
405
406 static void
407 mark_array_usage_impl(nir_function_impl *impl,
408 struct hash_table *var_info_map,
409 nir_variable_mode modes,
410 void *mem_ctx)
411 {
412 nir_foreach_block(block, impl) {
413 nir_foreach_instr(instr, block) {
414 if (instr->type != nir_instr_type_intrinsic)
415 continue;
416
417 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
418 switch (intrin->intrinsic) {
419 case nir_intrinsic_copy_deref:
420 mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
421 var_info_map, modes, mem_ctx);
422 /* Fall Through */
423
424 case nir_intrinsic_load_deref:
425 case nir_intrinsic_store_deref:
426 mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
427 var_info_map, modes, mem_ctx);
428 break;
429
430 default:
431 break;
432 }
433 }
434 }
435 }
436
437 static void
438 create_split_array_vars(struct array_var_info *var_info,
439 unsigned level,
440 struct array_split *split,
441 const char *name,
442 nir_shader *shader,
443 nir_function_impl *impl,
444 void *mem_ctx)
445 {
446 while (level < var_info->num_levels && !var_info->levels[level].split) {
447 name = ralloc_asprintf(mem_ctx, "%s[*]", name);
448 level++;
449 }
450
451 if (level == var_info->num_levels) {
452 /* We add parens to the variable name so it looks like "(foo[2][*])" so
453 * that further derefs will look like "(foo[2][*])[ssa_6]"
454 */
455 name = ralloc_asprintf(mem_ctx, "(%s)", name);
456
457 nir_variable_mode mode = var_info->base_var->data.mode;
458 if (mode == nir_var_function) {
459 split->var = nir_local_variable_create(impl,
460 var_info->split_var_type, name);
461 } else {
462 split->var = nir_variable_create(shader, mode,
463 var_info->split_var_type, name);
464 }
465 } else {
466 assert(var_info->levels[level].split);
467 split->num_splits = var_info->levels[level].array_len;
468 split->splits = rzalloc_array(mem_ctx, struct array_split,
469 split->num_splits);
470 for (unsigned i = 0; i < split->num_splits; i++) {
471 create_split_array_vars(var_info, level + 1, &split->splits[i],
472 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
473 shader, impl, mem_ctx);
474 }
475 }
476 }
477
478 static bool
479 split_var_list_arrays(nir_shader *shader,
480 nir_function_impl *impl,
481 struct exec_list *vars,
482 struct hash_table *var_info_map,
483 void *mem_ctx)
484 {
485 struct exec_list split_vars;
486 exec_list_make_empty(&split_vars);
487
488 nir_foreach_variable_safe(var, vars) {
489 struct array_var_info *info = get_array_var_info(var, var_info_map);
490 if (!info)
491 continue;
492
493 bool has_split = false;
494 const struct glsl_type *split_type =
495 glsl_without_array_or_matrix(var->type);
496 for (int i = info->num_levels - 1; i >= 0; i--) {
497 if (info->levels[i].split) {
498 has_split = true;
499 continue;
500 }
501
502 /* If the original type was a matrix type, we'd like to keep that so
503 * we don't convert matrices into arrays.
504 */
505 if (i == info->num_levels - 1 &&
506 glsl_type_is_matrix(glsl_without_array(var->type))) {
507 split_type = glsl_matrix_type(glsl_get_base_type(split_type),
508 glsl_get_components(split_type),
509 info->levels[i].array_len);
510 } else {
511 split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
512 }
513 }
514
515 if (has_split) {
516 info->split_var_type = split_type;
517 /* To avoid list confusion (we'll be adding things as we split
518 * variables), pull all of the variables we plan to split off of the
519 * main variable list.
520 */
521 exec_node_remove(&var->node);
522 exec_list_push_tail(&split_vars, &var->node);
523 } else {
524 assert(split_type == var->type);
525 /* If we're not modifying this variable, delete the info so we skip
526 * it faster in later passes.
527 */
528 _mesa_hash_table_remove_key(var_info_map, var);
529 }
530 }
531
532 nir_foreach_variable(var, &split_vars) {
533 struct array_var_info *info = get_array_var_info(var, var_info_map);
534 create_split_array_vars(info, 0, &info->root_split, var->name,
535 shader, impl, mem_ctx);
536 }
537
538 return !exec_list_is_empty(&split_vars);
539 }
540
541 static bool
542 deref_has_split_wildcard(nir_deref_path *path,
543 struct array_var_info *info)
544 {
545 if (info == NULL)
546 return false;
547
548 assert(path->path[0]->var == info->base_var);
549 for (unsigned i = 0; i < info->num_levels; i++) {
550 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
551 info->levels[i].split)
552 return true;
553 }
554
555 return false;
556 }
557
558 static bool
559 array_path_is_out_of_bounds(nir_deref_path *path,
560 struct array_var_info *info)
561 {
562 if (info == NULL)
563 return false;
564
565 assert(path->path[0]->var == info->base_var);
566 for (unsigned i = 0; i < info->num_levels; i++) {
567 nir_deref_instr *p = path->path[i + 1];
568 if (p->deref_type == nir_deref_type_array_wildcard)
569 continue;
570
571 if (nir_src_is_const(p->arr.index) &&
572 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
573 return true;
574 }
575
576 return false;
577 }
578
579 static void
580 emit_split_copies(nir_builder *b,
581 struct array_var_info *dst_info, nir_deref_path *dst_path,
582 unsigned dst_level, nir_deref_instr *dst,
583 struct array_var_info *src_info, nir_deref_path *src_path,
584 unsigned src_level, nir_deref_instr *src)
585 {
586 nir_deref_instr *dst_p, *src_p;
587
588 while ((dst_p = dst_path->path[dst_level + 1])) {
589 if (dst_p->deref_type == nir_deref_type_array_wildcard)
590 break;
591
592 dst = nir_build_deref_follower(b, dst, dst_p);
593 dst_level++;
594 }
595
596 while ((src_p = src_path->path[src_level + 1])) {
597 if (src_p->deref_type == nir_deref_type_array_wildcard)
598 break;
599
600 src = nir_build_deref_follower(b, src, src_p);
601 src_level++;
602 }
603
604 if (src_p == NULL || dst_p == NULL) {
605 assert(src_p == NULL && dst_p == NULL);
606 nir_copy_deref(b, dst, src);
607 } else {
608 assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
609 src_p->deref_type == nir_deref_type_array_wildcard);
610
611 if ((dst_info && dst_info->levels[dst_level].split) ||
612 (src_info && src_info->levels[src_level].split)) {
613 /* There are no indirects at this level on one of the source or the
614 * destination so we are lowering it.
615 */
616 assert(glsl_get_length(dst_path->path[dst_level]->type) ==
617 glsl_get_length(src_path->path[src_level]->type));
618 unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
619 for (unsigned i = 0; i < len; i++) {
620 nir_ssa_def *idx = nir_imm_int(b, i);
621 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
622 nir_build_deref_array(b, dst, idx),
623 src_info, src_path, src_level + 1,
624 nir_build_deref_array(b, src, idx));
625 }
626 } else {
627 /* Neither side is being split so we just keep going */
628 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
629 nir_build_deref_array_wildcard(b, dst),
630 src_info, src_path, src_level + 1,
631 nir_build_deref_array_wildcard(b, src));
632 }
633 }
634 }
635
636 static void
637 split_array_copies_impl(nir_function_impl *impl,
638 struct hash_table *var_info_map,
639 nir_variable_mode modes,
640 void *mem_ctx)
641 {
642 nir_builder b;
643 nir_builder_init(&b, impl);
644
645 nir_foreach_block(block, impl) {
646 nir_foreach_instr_safe(instr, block) {
647 if (instr->type != nir_instr_type_intrinsic)
648 continue;
649
650 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
651 if (copy->intrinsic != nir_intrinsic_copy_deref)
652 continue;
653
654 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
655 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
656
657 struct array_var_info *dst_info =
658 get_array_deref_info(dst_deref, var_info_map, modes);
659 struct array_var_info *src_info =
660 get_array_deref_info(src_deref, var_info_map, modes);
661
662 if (!src_info && !dst_info)
663 continue;
664
665 nir_deref_path dst_path, src_path;
666 nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
667 nir_deref_path_init(&src_path, src_deref, mem_ctx);
668
669 if (!deref_has_split_wildcard(&dst_path, dst_info) &&
670 !deref_has_split_wildcard(&src_path, src_info))
671 continue;
672
673 b.cursor = nir_instr_remove(&copy->instr);
674
675 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
676 src_info, &src_path, 0, src_path.path[0]);
677 }
678 }
679 }
680
681 static void
682 split_array_access_impl(nir_function_impl *impl,
683 struct hash_table *var_info_map,
684 nir_variable_mode modes,
685 void *mem_ctx)
686 {
687 nir_builder b;
688 nir_builder_init(&b, impl);
689
690 nir_foreach_block(block, impl) {
691 nir_foreach_instr_safe(instr, block) {
692 if (instr->type == nir_instr_type_deref) {
693 /* Clean up any dead derefs we find lying around. They may refer
694 * to variables we're planning to split.
695 */
696 nir_deref_instr *deref = nir_instr_as_deref(instr);
697 if (deref->mode & modes)
698 nir_deref_instr_remove_if_unused(deref);
699 continue;
700 }
701
702 if (instr->type != nir_instr_type_intrinsic)
703 continue;
704
705 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
706 if (intrin->intrinsic != nir_intrinsic_load_deref &&
707 intrin->intrinsic != nir_intrinsic_store_deref &&
708 intrin->intrinsic != nir_intrinsic_copy_deref)
709 continue;
710
711 const unsigned num_derefs =
712 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
713
714 for (unsigned d = 0; d < num_derefs; d++) {
715 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
716
717 struct array_var_info *info =
718 get_array_deref_info(deref, var_info_map, modes);
719 if (!info)
720 continue;
721
722 nir_deref_path path;
723 nir_deref_path_init(&path, deref, mem_ctx);
724
725 b.cursor = nir_before_instr(&intrin->instr);
726
727 if (array_path_is_out_of_bounds(&path, info)) {
728 /* If one of the derefs is out-of-bounds, we just delete the
729 * instruction. If a destination is out of bounds, then it may
730 * have been in-bounds prior to shrinking so we don't want to
731 * accidentally stomp something. However, we've already proven
732 * that it will never be read so it's safe to delete. If a
733 * source is out of bounds then it is loading random garbage.
734 * For loads, we replace their uses with an undef instruction
735 * and for copies we just delete the copy since it was writing
736 * undefined garbage anyway and we may as well leave the random
737 * garbage in the destination alone.
738 */
739 if (intrin->intrinsic == nir_intrinsic_load_deref) {
740 nir_ssa_def *u =
741 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
742 intrin->dest.ssa.bit_size);
743 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
744 nir_src_for_ssa(u));
745 }
746 nir_instr_remove(&intrin->instr);
747 for (unsigned i = 0; i < num_derefs; i++)
748 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
749 break;
750 }
751
752 struct array_split *split = &info->root_split;
753 for (unsigned i = 0; i < info->num_levels; i++) {
754 if (info->levels[i].split) {
755 nir_deref_instr *p = path.path[i + 1];
756 unsigned index = nir_src_as_uint(p->arr.index);
757 assert(index < info->levels[i].array_len);
758 split = &split->splits[index];
759 }
760 }
761 assert(!split->splits && split->var);
762
763 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
764 for (unsigned i = 0; i < info->num_levels; i++) {
765 if (!info->levels[i].split) {
766 new_deref = nir_build_deref_follower(&b, new_deref,
767 path.path[i + 1]);
768 }
769 }
770 assert(new_deref->type == deref->type);
771
772 /* Rewrite the deref source to point to the split one */
773 nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
774 nir_src_for_ssa(&new_deref->dest.ssa));
775 nir_deref_instr_remove_if_unused(deref);
776 }
777 }
778 }
779 }
780
781 /** A pass for splitting arrays of vectors into multiple variables
782 *
783 * This pass looks at arrays (possibly multiple levels) of vectors (not
784 * structures or other types) and tries to split them into piles of variables,
785 * one for each array element. The heuristic used is simple: If a given array
786 * level is never used with an indirect, that array level will get split.
787 *
788 * This pass probably could handles structures easily enough but making a pass
789 * that could see through an array of structures of arrays would be difficult
790 * so it's best to just run nir_split_struct_vars first.
791 */
792 bool
793 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
794 {
795 void *mem_ctx = ralloc_context(NULL);
796 struct hash_table *var_info_map =
797 _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
798 _mesa_key_pointer_equal);
799
800 assert((modes & (nir_var_private | nir_var_function)) == modes);
801
802 bool has_global_array = false;
803 if (modes & nir_var_private) {
804 has_global_array = init_var_list_array_infos(&shader->globals,
805 var_info_map, mem_ctx);
806 }
807
808 bool has_any_array = false;
809 nir_foreach_function(function, shader) {
810 if (!function->impl)
811 continue;
812
813 bool has_local_array = false;
814 if (modes & nir_var_function) {
815 has_local_array = init_var_list_array_infos(&function->impl->locals,
816 var_info_map, mem_ctx);
817 }
818
819 if (has_global_array || has_local_array) {
820 has_any_array = true;
821 mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
822 }
823 }
824
825 /* If we failed to find any arrays of arrays, bail early. */
826 if (!has_any_array) {
827 ralloc_free(mem_ctx);
828 return false;
829 }
830
831 bool has_global_splits = false;
832 if (modes & nir_var_private) {
833 has_global_splits = split_var_list_arrays(shader, NULL,
834 &shader->globals,
835 var_info_map, mem_ctx);
836 }
837
838 bool progress = false;
839 nir_foreach_function(function, shader) {
840 if (!function->impl)
841 continue;
842
843 bool has_local_splits = false;
844 if (modes & nir_var_function) {
845 has_local_splits = split_var_list_arrays(shader, function->impl,
846 &function->impl->locals,
847 var_info_map, mem_ctx);
848 }
849
850 if (has_global_splits || has_local_splits) {
851 split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
852 split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
853
854 nir_metadata_preserve(function->impl, nir_metadata_block_index |
855 nir_metadata_dominance);
856 progress = true;
857 }
858 }
859
860 ralloc_free(mem_ctx);
861
862 return progress;
863 }
864
865 struct array_level_usage {
866 unsigned array_len;
867
868 /* The value UINT_MAX will be used to indicate an indirect */
869 unsigned max_read;
870 unsigned max_written;
871
872 /* True if there is a copy that isn't to/from a shrinkable array */
873 bool has_external_copy;
874 struct set *levels_copied;
875 };
876
877 struct vec_var_usage {
878 /* Convenience set of all components this variable has */
879 nir_component_mask_t all_comps;
880
881 nir_component_mask_t comps_read;
882 nir_component_mask_t comps_written;
883
884 nir_component_mask_t comps_kept;
885
886 /* True if there is a copy that isn't to/from a shrinkable vector */
887 bool has_external_copy;
888 struct set *vars_copied;
889
890 unsigned num_levels;
891 struct array_level_usage levels[0];
892 };
893
894 static struct vec_var_usage *
895 get_vec_var_usage(nir_variable *var,
896 struct hash_table *var_usage_map,
897 bool add_usage_entry, void *mem_ctx)
898 {
899 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
900 if (entry)
901 return entry->data;
902
903 if (!add_usage_entry)
904 return NULL;
905
906 /* Check to make sure that we are working with an array of vectors. We
907 * don't bother to shrink single vectors because we figure that we can
908 * clean it up better with SSA than by inserting piles of vecN instructions
909 * to compact results.
910 */
911 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
912 if (num_levels < 1)
913 return NULL; /* Not an array of vectors */
914
915 struct vec_var_usage *usage =
916 rzalloc_size(mem_ctx, sizeof(*usage) +
917 num_levels * sizeof(usage->levels[0]));
918
919 usage->num_levels = num_levels;
920 const struct glsl_type *type = var->type;
921 for (unsigned i = 0; i < num_levels; i++) {
922 usage->levels[i].array_len = glsl_get_length(type);
923 assert(glsl_get_explicit_stride(type) == 0);
924 type = glsl_get_array_element(type);
925 }
926 assert(glsl_type_is_vector_or_scalar(type));
927
928 usage->all_comps = (1 << glsl_get_components(type)) - 1;
929
930 _mesa_hash_table_insert(var_usage_map, var, usage);
931
932 return usage;
933 }
934
935 static struct vec_var_usage *
936 get_vec_deref_usage(nir_deref_instr *deref,
937 struct hash_table *var_usage_map,
938 nir_variable_mode modes,
939 bool add_usage_entry, void *mem_ctx)
940 {
941 if (!(deref->mode & modes))
942 return NULL;
943
944 return get_vec_var_usage(nir_deref_instr_get_variable(deref),
945 var_usage_map, add_usage_entry, mem_ctx);
946 }
947
948 static void
949 mark_deref_used(nir_deref_instr *deref,
950 nir_component_mask_t comps_read,
951 nir_component_mask_t comps_written,
952 nir_deref_instr *copy_deref,
953 struct hash_table *var_usage_map,
954 nir_variable_mode modes,
955 void *mem_ctx)
956 {
957 if (!(deref->mode & modes))
958 return;
959
960 nir_variable *var = nir_deref_instr_get_variable(deref);
961
962 struct vec_var_usage *usage =
963 get_vec_var_usage(var, var_usage_map, true, mem_ctx);
964 if (!usage)
965 return;
966
967 usage->comps_read |= comps_read & usage->all_comps;
968 usage->comps_written |= comps_written & usage->all_comps;
969
970 struct vec_var_usage *copy_usage = NULL;
971 if (copy_deref) {
972 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
973 true, mem_ctx);
974 if (copy_usage) {
975 if (usage->vars_copied == NULL) {
976 usage->vars_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
977 _mesa_key_pointer_equal);
978 }
979 _mesa_set_add(usage->vars_copied, copy_usage);
980 } else {
981 usage->has_external_copy = true;
982 }
983 }
984
985 nir_deref_path path;
986 nir_deref_path_init(&path, deref, mem_ctx);
987
988 nir_deref_path copy_path;
989 if (copy_usage)
990 nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
991
992 unsigned copy_i = 0;
993 for (unsigned i = 0; i < usage->num_levels; i++) {
994 struct array_level_usage *level = &usage->levels[i];
995 nir_deref_instr *deref = path.path[i + 1];
996 assert(deref->deref_type == nir_deref_type_array ||
997 deref->deref_type == nir_deref_type_array_wildcard);
998
999 unsigned max_used;
1000 if (deref->deref_type == nir_deref_type_array) {
1001 max_used = nir_src_is_const(deref->arr.index) ?
1002 nir_src_as_uint(deref->arr.index) : UINT_MAX;
1003 } else {
1004 /* For wildcards, we read or wrote the whole thing. */
1005 assert(deref->deref_type == nir_deref_type_array_wildcard);
1006 max_used = level->array_len - 1;
1007
1008 if (copy_usage) {
1009 /* Match each wildcard level with the level on copy_usage */
1010 for (; copy_path.path[copy_i + 1]; copy_i++) {
1011 if (copy_path.path[copy_i + 1]->deref_type ==
1012 nir_deref_type_array_wildcard)
1013 break;
1014 }
1015 struct array_level_usage *copy_level =
1016 &copy_usage->levels[copy_i++];
1017
1018 if (level->levels_copied == NULL) {
1019 level->levels_copied =
1020 _mesa_set_create(mem_ctx, _mesa_hash_pointer,
1021 _mesa_key_pointer_equal);
1022 }
1023 _mesa_set_add(level->levels_copied, copy_level);
1024 } else {
1025 /* We have a wildcard and it comes from a variable we aren't
1026 * tracking; flag it and we'll know to not shorten this array.
1027 */
1028 level->has_external_copy = true;
1029 }
1030 }
1031
1032 if (comps_written)
1033 level->max_written = MAX2(level->max_written, max_used);
1034 if (comps_read)
1035 level->max_read = MAX2(level->max_read, max_used);
1036 }
1037 }
1038
1039 static bool
1040 src_is_load_deref(nir_src src, nir_src deref_src)
1041 {
1042 assert(src.is_ssa);
1043 assert(deref_src.is_ssa);
1044
1045 if (src.ssa->parent_instr->type != nir_instr_type_intrinsic)
1046 return false;
1047
1048 nir_intrinsic_instr *load = nir_instr_as_intrinsic(src.ssa->parent_instr);
1049 if (load->intrinsic != nir_intrinsic_load_deref)
1050 return false;
1051
1052 assert(load->src[0].is_ssa);
1053
1054 return load->src[0].ssa == deref_src.ssa;
1055 }
1056
1057 /* Returns all non-self-referential components of a store instruction. A
1058 * component is self-referential if it comes from the same component of a load
1059 * instruction on the same deref. If the only data in a particular component
1060 * of a variable came directly from that component then it's undefined. The
1061 * only way to get defined data into a component of a variable is for it to
1062 * get written there by something outside or from a different component.
1063 *
1064 * This is a fairly common pattern in shaders that come from either GLSL IR or
1065 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1066 * load-vec-store.
1067 */
1068 static nir_component_mask_t
1069 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1070 {
1071 nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1072
1073 assert(store->src[1].is_ssa);
1074 nir_instr *src_instr = store->src[1].ssa->parent_instr;
1075 if (src_instr->type != nir_instr_type_alu)
1076 return comps;
1077
1078 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1079
1080 if (src_alu->op == nir_op_imov ||
1081 src_alu->op == nir_op_fmov) {
1082 /* If it's just a swizzle of a load from the same deref, discount any
1083 * channels that don't move in the swizzle.
1084 */
1085 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1086 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1087 if (src_alu->src[0].swizzle[i] == i)
1088 comps &= ~(1u << i);
1089 }
1090 }
1091 } else if (src_alu->op == nir_op_vec2 ||
1092 src_alu->op == nir_op_vec3 ||
1093 src_alu->op == nir_op_vec4) {
1094 /* If it's a vec, discount any channels that are just loads from the
1095 * same deref put in the same spot.
1096 */
1097 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1098 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1099 src_alu->src[i].swizzle[0] == i)
1100 comps &= ~(1u << i);
1101 }
1102 }
1103
1104 return comps;
1105 }
1106
1107 static void
1108 find_used_components_impl(nir_function_impl *impl,
1109 struct hash_table *var_usage_map,
1110 nir_variable_mode modes,
1111 void *mem_ctx)
1112 {
1113 nir_foreach_block(block, impl) {
1114 nir_foreach_instr(instr, block) {
1115 if (instr->type != nir_instr_type_intrinsic)
1116 continue;
1117
1118 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1119 switch (intrin->intrinsic) {
1120 case nir_intrinsic_load_deref:
1121 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1122 nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1123 NULL, var_usage_map, modes, mem_ctx);
1124 break;
1125
1126 case nir_intrinsic_store_deref:
1127 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1128 0, get_non_self_referential_store_comps(intrin),
1129 NULL, var_usage_map, modes, mem_ctx);
1130 break;
1131
1132 case nir_intrinsic_copy_deref: {
1133 /* Just mark everything used for copies. */
1134 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1135 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1136 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1137 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1138 break;
1139 }
1140
1141 default:
1142 break;
1143 }
1144 }
1145 }
1146 }
1147
1148 static bool
1149 shrink_vec_var_list(struct exec_list *vars,
1150 struct hash_table *var_usage_map)
1151 {
1152 /* Initialize the components kept field of each variable. This is the
1153 * AND of the components written and components read. If a component is
1154 * written but never read, it's dead. If it is read but never written,
1155 * then all values read are undefined garbage and we may as well not read
1156 * them.
1157 *
1158 * The same logic applies to the array length. We make the array length
1159 * the minimum needed required length between read and write and plan to
1160 * discard any OOB access. The one exception here is indirect writes
1161 * because we don't know where they will land and we can't shrink an array
1162 * with indirect writes because previously in-bounds writes may become
1163 * out-of-bounds and have undefined behavior.
1164 *
1165 * Also, if we have a copy that to/from something we can't shrink, we need
1166 * to leave components and array_len of any wildcards alone.
1167 */
1168 nir_foreach_variable(var, vars) {
1169 struct vec_var_usage *usage =
1170 get_vec_var_usage(var, var_usage_map, false, NULL);
1171 if (!usage)
1172 continue;
1173
1174 assert(usage->comps_kept == 0);
1175 if (usage->has_external_copy)
1176 usage->comps_kept = usage->all_comps;
1177 else
1178 usage->comps_kept = usage->comps_read & usage->comps_written;
1179
1180 for (unsigned i = 0; i < usage->num_levels; i++) {
1181 struct array_level_usage *level = &usage->levels[i];
1182 assert(level->array_len > 0);
1183
1184 if (level->max_written == UINT_MAX || level->has_external_copy)
1185 continue; /* Can't shrink */
1186
1187 unsigned max_used = MIN2(level->max_read, level->max_written);
1188 level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1189 }
1190 }
1191
1192 /* In order for variable copies to work, we have to have the same data type
1193 * on the source and the destination. In order to satisfy this, we run a
1194 * little fixed-point algorithm to transitively ensure that we get enough
1195 * components and array elements for this to hold for all copies.
1196 */
1197 bool fp_progress;
1198 do {
1199 fp_progress = false;
1200 nir_foreach_variable(var, vars) {
1201 struct vec_var_usage *var_usage =
1202 get_vec_var_usage(var, var_usage_map, false, NULL);
1203 if (!var_usage || !var_usage->vars_copied)
1204 continue;
1205
1206 set_foreach(var_usage->vars_copied, copy_entry) {
1207 struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1208 if (copy_usage->comps_kept != var_usage->comps_kept) {
1209 nir_component_mask_t comps_kept =
1210 (var_usage->comps_kept | copy_usage->comps_kept);
1211 var_usage->comps_kept = comps_kept;
1212 copy_usage->comps_kept = comps_kept;
1213 fp_progress = true;
1214 }
1215 }
1216
1217 for (unsigned i = 0; i < var_usage->num_levels; i++) {
1218 struct array_level_usage *var_level = &var_usage->levels[i];
1219 if (!var_level->levels_copied)
1220 continue;
1221
1222 set_foreach(var_level->levels_copied, copy_entry) {
1223 struct array_level_usage *copy_level = (void *)copy_entry->key;
1224 if (var_level->array_len != copy_level->array_len) {
1225 unsigned array_len =
1226 MAX2(var_level->array_len, copy_level->array_len);
1227 var_level->array_len = array_len;
1228 copy_level->array_len = array_len;
1229 fp_progress = true;
1230 }
1231 }
1232 }
1233 }
1234 } while (fp_progress);
1235
1236 bool vars_shrunk = false;
1237 nir_foreach_variable_safe(var, vars) {
1238 struct vec_var_usage *usage =
1239 get_vec_var_usage(var, var_usage_map, false, NULL);
1240 if (!usage)
1241 continue;
1242
1243 bool shrunk = false;
1244 const struct glsl_type *vec_type = var->type;
1245 for (unsigned i = 0; i < usage->num_levels; i++) {
1246 /* If we've reduced the array to zero elements at some level, just
1247 * set comps_kept to 0 and delete the variable.
1248 */
1249 if (usage->levels[i].array_len == 0) {
1250 usage->comps_kept = 0;
1251 break;
1252 }
1253
1254 assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1255 if (usage->levels[i].array_len < glsl_get_length(vec_type))
1256 shrunk = true;
1257 vec_type = glsl_get_array_element(vec_type);
1258 }
1259 assert(glsl_type_is_vector_or_scalar(vec_type));
1260
1261 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1262 if (usage->comps_kept != usage->all_comps)
1263 shrunk = true;
1264
1265 if (usage->comps_kept == 0) {
1266 /* This variable is dead, remove it */
1267 vars_shrunk = true;
1268 exec_node_remove(&var->node);
1269 continue;
1270 }
1271
1272 if (!shrunk) {
1273 /* This variable doesn't need to be shrunk. Remove it from the
1274 * hash table so later steps will ignore it.
1275 */
1276 _mesa_hash_table_remove_key(var_usage_map, var);
1277 continue;
1278 }
1279
1280 /* Build the new var type */
1281 unsigned new_num_comps = util_bitcount(usage->comps_kept);
1282 const struct glsl_type *new_type =
1283 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1284 for (int i = usage->num_levels - 1; i >= 0; i--) {
1285 assert(usage->levels[i].array_len > 0);
1286 /* If the original type was a matrix type, we'd like to keep that so
1287 * we don't convert matrices into arrays.
1288 */
1289 if (i == usage->num_levels - 1 &&
1290 glsl_type_is_matrix(glsl_without_array(var->type)) &&
1291 new_num_comps > 1 && usage->levels[i].array_len > 1) {
1292 new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1293 new_num_comps,
1294 usage->levels[i].array_len);
1295 } else {
1296 new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1297 }
1298 }
1299 var->type = new_type;
1300
1301 vars_shrunk = true;
1302 }
1303
1304 return vars_shrunk;
1305 }
1306
1307 static bool
1308 vec_deref_is_oob(nir_deref_instr *deref,
1309 struct vec_var_usage *usage)
1310 {
1311 nir_deref_path path;
1312 nir_deref_path_init(&path, deref, NULL);
1313
1314 bool oob = false;
1315 for (unsigned i = 0; i < usage->num_levels; i++) {
1316 nir_deref_instr *p = path.path[i + 1];
1317 if (p->deref_type == nir_deref_type_array_wildcard)
1318 continue;
1319
1320 if (nir_src_is_const(p->arr.index) &&
1321 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1322 oob = true;
1323 break;
1324 }
1325 }
1326
1327 nir_deref_path_finish(&path);
1328
1329 return oob;
1330 }
1331
1332 static bool
1333 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1334 struct hash_table *var_usage_map,
1335 nir_variable_mode modes)
1336 {
1337 struct vec_var_usage *usage =
1338 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1339 if (!usage)
1340 return false;
1341
1342 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1343 }
1344
1345 static void
1346 shrink_vec_var_access_impl(nir_function_impl *impl,
1347 struct hash_table *var_usage_map,
1348 nir_variable_mode modes)
1349 {
1350 nir_builder b;
1351 nir_builder_init(&b, impl);
1352
1353 nir_foreach_block(block, impl) {
1354 nir_foreach_instr_safe(instr, block) {
1355 switch (instr->type) {
1356 case nir_instr_type_deref: {
1357 nir_deref_instr *deref = nir_instr_as_deref(instr);
1358 if (!(deref->mode & modes))
1359 break;
1360
1361 /* Clean up any dead derefs we find lying around. They may refer
1362 * to variables we've deleted.
1363 */
1364 if (nir_deref_instr_remove_if_unused(deref))
1365 break;
1366
1367 /* Update the type in the deref to keep the types consistent as
1368 * you walk down the chain. We don't need to check if this is one
1369 * of the derefs we're shrinking because this is a no-op if it
1370 * isn't. The worst that could happen is that we accidentally fix
1371 * an invalid deref.
1372 */
1373 if (deref->deref_type == nir_deref_type_var) {
1374 deref->type = deref->var->type;
1375 } else if (deref->deref_type == nir_deref_type_array ||
1376 deref->deref_type == nir_deref_type_array_wildcard) {
1377 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1378 assert(glsl_type_is_array(parent->type) ||
1379 glsl_type_is_matrix(parent->type));
1380 deref->type = glsl_get_array_element(parent->type);
1381 }
1382 break;
1383 }
1384
1385 case nir_instr_type_intrinsic: {
1386 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1387
1388 /* If we have a copy whose source or destination has been deleted
1389 * because we determined the variable was dead, then we just
1390 * delete the copy instruction. If the source variable was dead
1391 * then it was writing undefined garbage anyway and if it's the
1392 * destination variable that's dead then the write isn't needed.
1393 */
1394 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1395 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1396 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1397 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1398 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1399 nir_instr_remove(&intrin->instr);
1400 nir_deref_instr_remove_if_unused(dst);
1401 nir_deref_instr_remove_if_unused(src);
1402 }
1403 continue;
1404 }
1405
1406 if (intrin->intrinsic != nir_intrinsic_load_deref &&
1407 intrin->intrinsic != nir_intrinsic_store_deref)
1408 continue;
1409
1410 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1411 if (!(deref->mode & modes))
1412 continue;
1413
1414 struct vec_var_usage *usage =
1415 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1416 if (!usage)
1417 continue;
1418
1419 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1420 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1421 nir_ssa_def *u =
1422 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1423 intrin->dest.ssa.bit_size);
1424 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1425 nir_src_for_ssa(u));
1426 }
1427 nir_instr_remove(&intrin->instr);
1428 nir_deref_instr_remove_if_unused(deref);
1429 continue;
1430 }
1431
1432 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1433 b.cursor = nir_after_instr(&intrin->instr);
1434
1435 nir_ssa_def *undef =
1436 nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1437 nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1438 unsigned c = 0;
1439 for (unsigned i = 0; i < intrin->num_components; i++) {
1440 if (usage->comps_kept & (1u << i))
1441 vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1442 else
1443 vec_srcs[i] = undef;
1444 }
1445 nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1446
1447 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1448 nir_src_for_ssa(vec),
1449 vec->parent_instr);
1450
1451 /* The SSA def is now only used by the swizzle. It's safe to
1452 * shrink the number of components.
1453 */
1454 assert(list_length(&intrin->dest.ssa.uses) == c);
1455 intrin->num_components = c;
1456 intrin->dest.ssa.num_components = c;
1457 } else {
1458 nir_component_mask_t write_mask =
1459 nir_intrinsic_write_mask(intrin);
1460
1461 unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1462 nir_component_mask_t new_write_mask = 0;
1463 unsigned c = 0;
1464 for (unsigned i = 0; i < intrin->num_components; i++) {
1465 if (usage->comps_kept & (1u << i)) {
1466 swizzle[c] = i;
1467 if (write_mask & (1u << i))
1468 new_write_mask |= 1u << c;
1469 c++;
1470 }
1471 }
1472
1473 b.cursor = nir_before_instr(&intrin->instr);
1474
1475 nir_ssa_def *swizzled =
1476 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false);
1477
1478 /* Rewrite to use the compacted source */
1479 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1480 nir_src_for_ssa(swizzled));
1481 nir_intrinsic_set_write_mask(intrin, new_write_mask);
1482 intrin->num_components = c;
1483 }
1484 break;
1485 }
1486
1487 default:
1488 break;
1489 }
1490 }
1491 }
1492 }
1493
1494 static bool
1495 function_impl_has_vars_with_modes(nir_function_impl *impl,
1496 nir_variable_mode modes)
1497 {
1498 nir_shader *shader = impl->function->shader;
1499
1500 if ((modes & nir_var_private) && !exec_list_is_empty(&shader->globals))
1501 return true;
1502
1503 if ((modes & nir_var_function) && !exec_list_is_empty(&impl->locals))
1504 return true;
1505
1506 return false;
1507 }
1508
1509 /** Attempt to shrink arrays of vectors
1510 *
1511 * This pass looks at variables which contain a vector or an array (possibly
1512 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1513 * or array. If the pass can prove that a component of a vector (or array of
1514 * vectors) is never really used, then that component will be removed.
1515 * Similarly, the pass attempts to shorten arrays based on what elements it
1516 * can prove are never read or never contain valid data.
1517 */
1518 bool
1519 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1520 {
1521 assert((modes & (nir_var_private | nir_var_function)) == modes);
1522
1523 void *mem_ctx = ralloc_context(NULL);
1524
1525 struct hash_table *var_usage_map =
1526 _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
1527 _mesa_key_pointer_equal);
1528
1529 bool has_vars_to_shrink = false;
1530 nir_foreach_function(function, shader) {
1531 if (!function->impl)
1532 continue;
1533
1534 /* Don't even bother crawling the IR if we don't have any variables.
1535 * Given that this pass deletes any unused variables, it's likely that
1536 * we will be in this scenario eventually.
1537 */
1538 if (function_impl_has_vars_with_modes(function->impl, modes)) {
1539 has_vars_to_shrink = true;
1540 find_used_components_impl(function->impl, var_usage_map,
1541 modes, mem_ctx);
1542 }
1543 }
1544 if (!has_vars_to_shrink) {
1545 ralloc_free(mem_ctx);
1546 return false;
1547 }
1548
1549 bool globals_shrunk = false;
1550 if (modes & nir_var_private)
1551 globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
1552
1553 bool progress = false;
1554 nir_foreach_function(function, shader) {
1555 if (!function->impl)
1556 continue;
1557
1558 bool locals_shrunk = false;
1559 if (modes & nir_var_function) {
1560 locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1561 var_usage_map);
1562 }
1563
1564 if (globals_shrunk || locals_shrunk) {
1565 shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1566
1567 nir_metadata_preserve(function->impl, nir_metadata_block_index |
1568 nir_metadata_dominance);
1569 progress = true;
1570 }
1571 }
1572
1573 ralloc_free(mem_ctx);
1574
1575 return progress;
1576 }