2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
30 #include "util/u_math.h"
33 get_complex_used_vars(nir_shader
*shader
, void *mem_ctx
)
35 struct set
*complex_vars
= _mesa_pointer_set_create(mem_ctx
);
37 nir_foreach_function(function
, shader
) {
41 nir_foreach_block(block
, function
->impl
) {
42 nir_foreach_instr(instr
, block
) {
43 if (instr
->type
!= nir_instr_type_deref
)
46 nir_deref_instr
*deref
= nir_instr_as_deref(instr
);
48 /* We only need to consider var derefs because
49 * nir_deref_instr_has_complex_use is recursive.
51 if (deref
->deref_type
== nir_deref_type_var
&&
52 nir_deref_instr_has_complex_use(deref
))
53 _mesa_set_add(complex_vars
, deref
->var
);
61 struct split_var_state
{
65 nir_function_impl
*impl
;
67 nir_variable
*base_var
;
73 const struct glsl_type
*type
;
81 static const struct glsl_type
*
82 wrap_type_in_array(const struct glsl_type
*type
,
83 const struct glsl_type
*array_type
)
85 if (!glsl_type_is_array(array_type
))
88 const struct glsl_type
*elem_type
=
89 wrap_type_in_array(type
, glsl_get_array_element(array_type
));
90 assert(glsl_get_explicit_stride(array_type
) == 0);
91 return glsl_array_type(elem_type
, glsl_get_length(array_type
), 0);
95 num_array_levels_in_array_of_vector_type(const struct glsl_type
*type
)
99 if (glsl_type_is_array_or_matrix(type
)) {
101 type
= glsl_get_array_element(type
);
102 } else if (glsl_type_is_vector_or_scalar(type
)) {
105 /* Not an array of vectors */
112 init_field_for_type(struct field
*field
, struct field
*parent
,
113 const struct glsl_type
*type
,
115 struct split_var_state
*state
)
117 *field
= (struct field
) {
122 const struct glsl_type
*struct_type
= glsl_without_array(type
);
123 if (glsl_type_is_struct_or_ifc(struct_type
)) {
124 field
->num_fields
= glsl_get_length(struct_type
),
125 field
->fields
= ralloc_array(state
->mem_ctx
, struct field
,
127 for (unsigned i
= 0; i
< field
->num_fields
; i
++) {
128 char *field_name
= NULL
;
130 field_name
= ralloc_asprintf(state
->mem_ctx
, "%s_%s", name
,
131 glsl_get_struct_elem_name(struct_type
, i
));
133 field_name
= ralloc_asprintf(state
->mem_ctx
, "{unnamed %s}_%s",
134 glsl_get_type_name(struct_type
),
135 glsl_get_struct_elem_name(struct_type
, i
));
137 init_field_for_type(&field
->fields
[i
], field
,
138 glsl_get_struct_field(struct_type
, i
),
142 const struct glsl_type
*var_type
= type
;
143 for (struct field
*f
= field
->parent
; f
; f
= f
->parent
)
144 var_type
= wrap_type_in_array(var_type
, f
->type
);
146 nir_variable_mode mode
= state
->base_var
->data
.mode
;
147 if (mode
== nir_var_function_temp
) {
148 field
->var
= nir_local_variable_create(state
->impl
, var_type
, name
);
150 field
->var
= nir_variable_create(state
->shader
, mode
, var_type
, name
);
156 split_var_list_structs(nir_shader
*shader
,
157 nir_function_impl
*impl
,
158 struct exec_list
*vars
,
159 nir_variable_mode mode
,
160 struct hash_table
*var_field_map
,
161 struct set
**complex_vars
,
164 struct split_var_state state
= {
170 struct exec_list split_vars
;
171 exec_list_make_empty(&split_vars
);
173 /* To avoid list confusion (we'll be adding things as we split variables),
174 * pull all of the variables we plan to split off of the list
176 nir_foreach_variable_in_list_safe(var
, vars
) {
177 if (var
->data
.mode
!= mode
)
180 if (!glsl_type_is_struct_or_ifc(glsl_without_array(var
->type
)))
183 if (*complex_vars
== NULL
)
184 *complex_vars
= get_complex_used_vars(shader
, mem_ctx
);
186 /* We can't split a variable that's referenced with deref that has any
187 * sort of complex usage.
189 if (_mesa_set_search(*complex_vars
, var
))
192 exec_node_remove(&var
->node
);
193 exec_list_push_tail(&split_vars
, &var
->node
);
196 nir_foreach_variable_in_list(var
, &split_vars
) {
197 state
.base_var
= var
;
199 struct field
*root_field
= ralloc(mem_ctx
, struct field
);
200 init_field_for_type(root_field
, NULL
, var
->type
, var
->name
, &state
);
201 _mesa_hash_table_insert(var_field_map
, var
, root_field
);
204 return !exec_list_is_empty(&split_vars
);
208 split_struct_derefs_impl(nir_function_impl
*impl
,
209 struct hash_table
*var_field_map
,
210 nir_variable_mode modes
,
214 nir_builder_init(&b
, impl
);
216 nir_foreach_block(block
, impl
) {
217 nir_foreach_instr_safe(instr
, block
) {
218 if (instr
->type
!= nir_instr_type_deref
)
221 nir_deref_instr
*deref
= nir_instr_as_deref(instr
);
222 if (!(deref
->mode
& modes
))
225 /* Clean up any dead derefs we find lying around. They may refer to
226 * variables we're planning to split.
228 if (nir_deref_instr_remove_if_unused(deref
))
231 if (!glsl_type_is_vector_or_scalar(deref
->type
))
234 nir_variable
*base_var
= nir_deref_instr_get_variable(deref
);
235 struct hash_entry
*entry
=
236 _mesa_hash_table_search(var_field_map
, base_var
);
240 struct field
*root_field
= entry
->data
;
243 nir_deref_path_init(&path
, deref
, mem_ctx
);
245 struct field
*tail_field
= root_field
;
246 for (unsigned i
= 0; path
.path
[i
]; i
++) {
247 if (path
.path
[i
]->deref_type
!= nir_deref_type_struct
)
251 assert(glsl_type_is_struct_or_ifc(path
.path
[i
- 1]->type
));
252 assert(path
.path
[i
- 1]->type
==
253 glsl_without_array(tail_field
->type
));
255 tail_field
= &tail_field
->fields
[path
.path
[i
]->strct
.index
];
257 nir_variable
*split_var
= tail_field
->var
;
259 nir_deref_instr
*new_deref
= NULL
;
260 for (unsigned i
= 0; path
.path
[i
]; i
++) {
261 nir_deref_instr
*p
= path
.path
[i
];
262 b
.cursor
= nir_after_instr(&p
->instr
);
264 switch (p
->deref_type
) {
265 case nir_deref_type_var
:
266 assert(new_deref
== NULL
);
267 new_deref
= nir_build_deref_var(&b
, split_var
);
270 case nir_deref_type_array
:
271 case nir_deref_type_array_wildcard
:
272 new_deref
= nir_build_deref_follower(&b
, new_deref
, p
);
275 case nir_deref_type_struct
:
276 /* Nothing to do; we're splitting structs */
280 unreachable("Invalid deref type in path");
284 assert(new_deref
->type
== deref
->type
);
285 nir_ssa_def_rewrite_uses(&deref
->dest
.ssa
,
286 nir_src_for_ssa(&new_deref
->dest
.ssa
));
287 nir_deref_instr_remove_if_unused(deref
);
292 /** A pass for splitting structs into multiple variables
294 * This pass splits arrays of structs into multiple variables, one for each
295 * (possibly nested) structure member. After this pass completes, no
296 * variables of the given mode will contain a struct type.
299 nir_split_struct_vars(nir_shader
*shader
, nir_variable_mode modes
)
301 void *mem_ctx
= ralloc_context(NULL
);
302 struct hash_table
*var_field_map
=
303 _mesa_pointer_hash_table_create(mem_ctx
);
304 struct set
*complex_vars
= NULL
;
306 assert((modes
& (nir_var_shader_temp
| nir_var_function_temp
)) == modes
);
308 bool has_global_splits
= false;
309 if (modes
& nir_var_shader_temp
) {
310 has_global_splits
= split_var_list_structs(shader
, NULL
,
318 bool progress
= false;
319 nir_foreach_function(function
, shader
) {
323 bool has_local_splits
= false;
324 if (modes
& nir_var_function_temp
) {
325 has_local_splits
= split_var_list_structs(shader
, function
->impl
,
326 &function
->impl
->locals
,
327 nir_var_function_temp
,
333 if (has_global_splits
|| has_local_splits
) {
334 split_struct_derefs_impl(function
->impl
, var_field_map
,
337 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
338 nir_metadata_dominance
);
341 nir_metadata_preserve(function
->impl
, nir_metadata_all
);
345 ralloc_free(mem_ctx
);
350 struct array_level_info
{
356 /* Only set if this is the tail end of the splitting */
360 struct array_split
*splits
;
363 struct array_var_info
{
364 nir_variable
*base_var
;
366 const struct glsl_type
*split_var_type
;
369 struct array_split root_split
;
372 struct array_level_info levels
[0];
376 init_var_list_array_infos(nir_shader
*shader
,
377 struct exec_list
*vars
,
378 nir_variable_mode mode
,
379 struct hash_table
*var_info_map
,
380 struct set
**complex_vars
,
383 bool has_array
= false;
385 nir_foreach_variable_in_list(var
, vars
) {
386 if (var
->data
.mode
!= mode
)
389 int num_levels
= num_array_levels_in_array_of_vector_type(var
->type
);
393 if (*complex_vars
== NULL
)
394 *complex_vars
= get_complex_used_vars(shader
, mem_ctx
);
396 /* We can't split a variable that's referenced with deref that has any
397 * sort of complex usage.
399 if (_mesa_set_search(*complex_vars
, var
))
402 struct array_var_info
*info
=
403 rzalloc_size(mem_ctx
, sizeof(*info
) +
404 num_levels
* sizeof(info
->levels
[0]));
406 info
->base_var
= var
;
407 info
->num_levels
= num_levels
;
409 const struct glsl_type
*type
= var
->type
;
410 for (int i
= 0; i
< num_levels
; i
++) {
411 info
->levels
[i
].array_len
= glsl_get_length(type
);
412 type
= glsl_get_array_element(type
);
414 /* All levels start out initially as split */
415 info
->levels
[i
].split
= true;
418 _mesa_hash_table_insert(var_info_map
, var
, info
);
425 static struct array_var_info
*
426 get_array_var_info(nir_variable
*var
,
427 struct hash_table
*var_info_map
)
429 struct hash_entry
*entry
=
430 _mesa_hash_table_search(var_info_map
, var
);
431 return entry
? entry
->data
: NULL
;
434 static struct array_var_info
*
435 get_array_deref_info(nir_deref_instr
*deref
,
436 struct hash_table
*var_info_map
,
437 nir_variable_mode modes
)
439 if (!(deref
->mode
& modes
))
442 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
446 return get_array_var_info(var
, var_info_map
);
450 mark_array_deref_used(nir_deref_instr
*deref
,
451 struct hash_table
*var_info_map
,
452 nir_variable_mode modes
,
455 struct array_var_info
*info
=
456 get_array_deref_info(deref
, var_info_map
, modes
);
461 nir_deref_path_init(&path
, deref
, mem_ctx
);
463 /* Walk the path and look for indirects. If we have an array deref with an
464 * indirect, mark the given level as not being split.
466 for (unsigned i
= 0; i
< info
->num_levels
; i
++) {
467 nir_deref_instr
*p
= path
.path
[i
+ 1];
468 if (p
->deref_type
== nir_deref_type_array
&&
469 !nir_src_is_const(p
->arr
.index
))
470 info
->levels
[i
].split
= false;
475 mark_array_usage_impl(nir_function_impl
*impl
,
476 struct hash_table
*var_info_map
,
477 nir_variable_mode modes
,
480 nir_foreach_block(block
, impl
) {
481 nir_foreach_instr(instr
, block
) {
482 if (instr
->type
!= nir_instr_type_intrinsic
)
485 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
486 switch (intrin
->intrinsic
) {
487 case nir_intrinsic_copy_deref
:
488 mark_array_deref_used(nir_src_as_deref(intrin
->src
[1]),
489 var_info_map
, modes
, mem_ctx
);
492 case nir_intrinsic_load_deref
:
493 case nir_intrinsic_store_deref
:
494 mark_array_deref_used(nir_src_as_deref(intrin
->src
[0]),
495 var_info_map
, modes
, mem_ctx
);
506 create_split_array_vars(struct array_var_info
*var_info
,
508 struct array_split
*split
,
511 nir_function_impl
*impl
,
514 while (level
< var_info
->num_levels
&& !var_info
->levels
[level
].split
) {
515 name
= ralloc_asprintf(mem_ctx
, "%s[*]", name
);
519 if (level
== var_info
->num_levels
) {
520 /* We add parens to the variable name so it looks like "(foo[2][*])" so
521 * that further derefs will look like "(foo[2][*])[ssa_6]"
523 name
= ralloc_asprintf(mem_ctx
, "(%s)", name
);
525 nir_variable_mode mode
= var_info
->base_var
->data
.mode
;
526 if (mode
== nir_var_function_temp
) {
527 split
->var
= nir_local_variable_create(impl
,
528 var_info
->split_var_type
, name
);
530 split
->var
= nir_variable_create(shader
, mode
,
531 var_info
->split_var_type
, name
);
534 assert(var_info
->levels
[level
].split
);
535 split
->num_splits
= var_info
->levels
[level
].array_len
;
536 split
->splits
= rzalloc_array(mem_ctx
, struct array_split
,
538 for (unsigned i
= 0; i
< split
->num_splits
; i
++) {
539 create_split_array_vars(var_info
, level
+ 1, &split
->splits
[i
],
540 ralloc_asprintf(mem_ctx
, "%s[%d]", name
, i
),
541 shader
, impl
, mem_ctx
);
547 split_var_list_arrays(nir_shader
*shader
,
548 nir_function_impl
*impl
,
549 struct exec_list
*vars
,
550 nir_variable_mode mode
,
551 struct hash_table
*var_info_map
,
554 struct exec_list split_vars
;
555 exec_list_make_empty(&split_vars
);
557 nir_foreach_variable_in_list_safe(var
, vars
) {
558 if (var
->data
.mode
!= mode
)
561 struct array_var_info
*info
= get_array_var_info(var
, var_info_map
);
565 bool has_split
= false;
566 const struct glsl_type
*split_type
=
567 glsl_without_array_or_matrix(var
->type
);
568 for (int i
= info
->num_levels
- 1; i
>= 0; i
--) {
569 if (info
->levels
[i
].split
) {
574 /* If the original type was a matrix type, we'd like to keep that so
575 * we don't convert matrices into arrays.
577 if (i
== info
->num_levels
- 1 &&
578 glsl_type_is_matrix(glsl_without_array(var
->type
))) {
579 split_type
= glsl_matrix_type(glsl_get_base_type(split_type
),
580 glsl_get_components(split_type
),
581 info
->levels
[i
].array_len
);
583 split_type
= glsl_array_type(split_type
, info
->levels
[i
].array_len
, 0);
588 info
->split_var_type
= split_type
;
589 /* To avoid list confusion (we'll be adding things as we split
590 * variables), pull all of the variables we plan to split off of the
591 * main variable list.
593 exec_node_remove(&var
->node
);
594 exec_list_push_tail(&split_vars
, &var
->node
);
596 assert(split_type
== glsl_get_bare_type(var
->type
));
597 /* If we're not modifying this variable, delete the info so we skip
598 * it faster in later passes.
600 _mesa_hash_table_remove_key(var_info_map
, var
);
604 nir_foreach_variable_in_list(var
, &split_vars
) {
605 struct array_var_info
*info
= get_array_var_info(var
, var_info_map
);
606 create_split_array_vars(info
, 0, &info
->root_split
, var
->name
,
607 shader
, impl
, mem_ctx
);
610 return !exec_list_is_empty(&split_vars
);
614 deref_has_split_wildcard(nir_deref_path
*path
,
615 struct array_var_info
*info
)
620 assert(path
->path
[0]->var
== info
->base_var
);
621 for (unsigned i
= 0; i
< info
->num_levels
; i
++) {
622 if (path
->path
[i
+ 1]->deref_type
== nir_deref_type_array_wildcard
&&
623 info
->levels
[i
].split
)
631 array_path_is_out_of_bounds(nir_deref_path
*path
,
632 struct array_var_info
*info
)
637 assert(path
->path
[0]->var
== info
->base_var
);
638 for (unsigned i
= 0; i
< info
->num_levels
; i
++) {
639 nir_deref_instr
*p
= path
->path
[i
+ 1];
640 if (p
->deref_type
== nir_deref_type_array_wildcard
)
643 if (nir_src_is_const(p
->arr
.index
) &&
644 nir_src_as_uint(p
->arr
.index
) >= info
->levels
[i
].array_len
)
652 emit_split_copies(nir_builder
*b
,
653 struct array_var_info
*dst_info
, nir_deref_path
*dst_path
,
654 unsigned dst_level
, nir_deref_instr
*dst
,
655 struct array_var_info
*src_info
, nir_deref_path
*src_path
,
656 unsigned src_level
, nir_deref_instr
*src
)
658 nir_deref_instr
*dst_p
, *src_p
;
660 while ((dst_p
= dst_path
->path
[dst_level
+ 1])) {
661 if (dst_p
->deref_type
== nir_deref_type_array_wildcard
)
664 dst
= nir_build_deref_follower(b
, dst
, dst_p
);
668 while ((src_p
= src_path
->path
[src_level
+ 1])) {
669 if (src_p
->deref_type
== nir_deref_type_array_wildcard
)
672 src
= nir_build_deref_follower(b
, src
, src_p
);
676 if (src_p
== NULL
|| dst_p
== NULL
) {
677 assert(src_p
== NULL
&& dst_p
== NULL
);
678 nir_copy_deref(b
, dst
, src
);
680 assert(dst_p
->deref_type
== nir_deref_type_array_wildcard
&&
681 src_p
->deref_type
== nir_deref_type_array_wildcard
);
683 if ((dst_info
&& dst_info
->levels
[dst_level
].split
) ||
684 (src_info
&& src_info
->levels
[src_level
].split
)) {
685 /* There are no indirects at this level on one of the source or the
686 * destination so we are lowering it.
688 assert(glsl_get_length(dst_path
->path
[dst_level
]->type
) ==
689 glsl_get_length(src_path
->path
[src_level
]->type
));
690 unsigned len
= glsl_get_length(dst_path
->path
[dst_level
]->type
);
691 for (unsigned i
= 0; i
< len
; i
++) {
692 emit_split_copies(b
, dst_info
, dst_path
, dst_level
+ 1,
693 nir_build_deref_array_imm(b
, dst
, i
),
694 src_info
, src_path
, src_level
+ 1,
695 nir_build_deref_array_imm(b
, src
, i
));
698 /* Neither side is being split so we just keep going */
699 emit_split_copies(b
, dst_info
, dst_path
, dst_level
+ 1,
700 nir_build_deref_array_wildcard(b
, dst
),
701 src_info
, src_path
, src_level
+ 1,
702 nir_build_deref_array_wildcard(b
, src
));
708 split_array_copies_impl(nir_function_impl
*impl
,
709 struct hash_table
*var_info_map
,
710 nir_variable_mode modes
,
714 nir_builder_init(&b
, impl
);
716 nir_foreach_block(block
, impl
) {
717 nir_foreach_instr_safe(instr
, block
) {
718 if (instr
->type
!= nir_instr_type_intrinsic
)
721 nir_intrinsic_instr
*copy
= nir_instr_as_intrinsic(instr
);
722 if (copy
->intrinsic
!= nir_intrinsic_copy_deref
)
725 nir_deref_instr
*dst_deref
= nir_src_as_deref(copy
->src
[0]);
726 nir_deref_instr
*src_deref
= nir_src_as_deref(copy
->src
[1]);
728 struct array_var_info
*dst_info
=
729 get_array_deref_info(dst_deref
, var_info_map
, modes
);
730 struct array_var_info
*src_info
=
731 get_array_deref_info(src_deref
, var_info_map
, modes
);
733 if (!src_info
&& !dst_info
)
736 nir_deref_path dst_path
, src_path
;
737 nir_deref_path_init(&dst_path
, dst_deref
, mem_ctx
);
738 nir_deref_path_init(&src_path
, src_deref
, mem_ctx
);
740 if (!deref_has_split_wildcard(&dst_path
, dst_info
) &&
741 !deref_has_split_wildcard(&src_path
, src_info
))
744 b
.cursor
= nir_instr_remove(©
->instr
);
746 emit_split_copies(&b
, dst_info
, &dst_path
, 0, dst_path
.path
[0],
747 src_info
, &src_path
, 0, src_path
.path
[0]);
753 split_array_access_impl(nir_function_impl
*impl
,
754 struct hash_table
*var_info_map
,
755 nir_variable_mode modes
,
759 nir_builder_init(&b
, impl
);
761 nir_foreach_block(block
, impl
) {
762 nir_foreach_instr_safe(instr
, block
) {
763 if (instr
->type
== nir_instr_type_deref
) {
764 /* Clean up any dead derefs we find lying around. They may refer
765 * to variables we're planning to split.
767 nir_deref_instr
*deref
= nir_instr_as_deref(instr
);
768 if (deref
->mode
& modes
)
769 nir_deref_instr_remove_if_unused(deref
);
773 if (instr
->type
!= nir_instr_type_intrinsic
)
776 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
777 if (intrin
->intrinsic
!= nir_intrinsic_load_deref
&&
778 intrin
->intrinsic
!= nir_intrinsic_store_deref
&&
779 intrin
->intrinsic
!= nir_intrinsic_copy_deref
)
782 const unsigned num_derefs
=
783 intrin
->intrinsic
== nir_intrinsic_copy_deref
? 2 : 1;
785 for (unsigned d
= 0; d
< num_derefs
; d
++) {
786 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[d
]);
788 struct array_var_info
*info
=
789 get_array_deref_info(deref
, var_info_map
, modes
);
794 nir_deref_path_init(&path
, deref
, mem_ctx
);
796 b
.cursor
= nir_before_instr(&intrin
->instr
);
798 if (array_path_is_out_of_bounds(&path
, info
)) {
799 /* If one of the derefs is out-of-bounds, we just delete the
800 * instruction. If a destination is out of bounds, then it may
801 * have been in-bounds prior to shrinking so we don't want to
802 * accidentally stomp something. However, we've already proven
803 * that it will never be read so it's safe to delete. If a
804 * source is out of bounds then it is loading random garbage.
805 * For loads, we replace their uses with an undef instruction
806 * and for copies we just delete the copy since it was writing
807 * undefined garbage anyway and we may as well leave the random
808 * garbage in the destination alone.
810 if (intrin
->intrinsic
== nir_intrinsic_load_deref
) {
812 nir_ssa_undef(&b
, intrin
->dest
.ssa
.num_components
,
813 intrin
->dest
.ssa
.bit_size
);
814 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
817 nir_instr_remove(&intrin
->instr
);
818 for (unsigned i
= 0; i
< num_derefs
; i
++)
819 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin
->src
[i
]));
823 struct array_split
*split
= &info
->root_split
;
824 for (unsigned i
= 0; i
< info
->num_levels
; i
++) {
825 if (info
->levels
[i
].split
) {
826 nir_deref_instr
*p
= path
.path
[i
+ 1];
827 unsigned index
= nir_src_as_uint(p
->arr
.index
);
828 assert(index
< info
->levels
[i
].array_len
);
829 split
= &split
->splits
[index
];
832 assert(!split
->splits
&& split
->var
);
834 nir_deref_instr
*new_deref
= nir_build_deref_var(&b
, split
->var
);
835 for (unsigned i
= 0; i
< info
->num_levels
; i
++) {
836 if (!info
->levels
[i
].split
) {
837 new_deref
= nir_build_deref_follower(&b
, new_deref
,
841 assert(new_deref
->type
== deref
->type
);
843 /* Rewrite the deref source to point to the split one */
844 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[d
],
845 nir_src_for_ssa(&new_deref
->dest
.ssa
));
846 nir_deref_instr_remove_if_unused(deref
);
852 /** A pass for splitting arrays of vectors into multiple variables
854 * This pass looks at arrays (possibly multiple levels) of vectors (not
855 * structures or other types) and tries to split them into piles of variables,
856 * one for each array element. The heuristic used is simple: If a given array
857 * level is never used with an indirect, that array level will get split.
859 * This pass probably could handles structures easily enough but making a pass
860 * that could see through an array of structures of arrays would be difficult
861 * so it's best to just run nir_split_struct_vars first.
864 nir_split_array_vars(nir_shader
*shader
, nir_variable_mode modes
)
866 void *mem_ctx
= ralloc_context(NULL
);
867 struct hash_table
*var_info_map
= _mesa_pointer_hash_table_create(mem_ctx
);
868 struct set
*complex_vars
= NULL
;
870 assert((modes
& (nir_var_shader_temp
| nir_var_function_temp
)) == modes
);
872 bool has_global_array
= false;
873 if (modes
& nir_var_shader_temp
) {
874 has_global_array
= init_var_list_array_infos(shader
,
882 bool has_any_array
= false;
883 nir_foreach_function(function
, shader
) {
887 bool has_local_array
= false;
888 if (modes
& nir_var_function_temp
) {
889 has_local_array
= init_var_list_array_infos(shader
,
890 &function
->impl
->locals
,
891 nir_var_function_temp
,
897 if (has_global_array
|| has_local_array
) {
898 has_any_array
= true;
899 mark_array_usage_impl(function
->impl
, var_info_map
, modes
, mem_ctx
);
903 /* If we failed to find any arrays of arrays, bail early. */
904 if (!has_any_array
) {
905 ralloc_free(mem_ctx
);
906 nir_shader_preserve_all_metadata(shader
);
910 bool has_global_splits
= false;
911 if (modes
& nir_var_shader_temp
) {
912 has_global_splits
= split_var_list_arrays(shader
, NULL
,
915 var_info_map
, mem_ctx
);
918 bool progress
= false;
919 nir_foreach_function(function
, shader
) {
923 bool has_local_splits
= false;
924 if (modes
& nir_var_function_temp
) {
925 has_local_splits
= split_var_list_arrays(shader
, function
->impl
,
926 &function
->impl
->locals
,
927 nir_var_function_temp
,
928 var_info_map
, mem_ctx
);
931 if (has_global_splits
|| has_local_splits
) {
932 split_array_copies_impl(function
->impl
, var_info_map
, modes
, mem_ctx
);
933 split_array_access_impl(function
->impl
, var_info_map
, modes
, mem_ctx
);
935 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
936 nir_metadata_dominance
);
939 nir_metadata_preserve(function
->impl
, nir_metadata_all
);
943 ralloc_free(mem_ctx
);
948 struct array_level_usage
{
951 /* The value UINT_MAX will be used to indicate an indirect */
953 unsigned max_written
;
955 /* True if there is a copy that isn't to/from a shrinkable array */
956 bool has_external_copy
;
957 struct set
*levels_copied
;
960 struct vec_var_usage
{
961 /* Convenience set of all components this variable has */
962 nir_component_mask_t all_comps
;
964 nir_component_mask_t comps_read
;
965 nir_component_mask_t comps_written
;
967 nir_component_mask_t comps_kept
;
969 /* True if there is a copy that isn't to/from a shrinkable vector */
970 bool has_external_copy
;
971 bool has_complex_use
;
972 struct set
*vars_copied
;
975 struct array_level_usage levels
[0];
978 static struct vec_var_usage
*
979 get_vec_var_usage(nir_variable
*var
,
980 struct hash_table
*var_usage_map
,
981 bool add_usage_entry
, void *mem_ctx
)
983 struct hash_entry
*entry
= _mesa_hash_table_search(var_usage_map
, var
);
987 if (!add_usage_entry
)
990 /* Check to make sure that we are working with an array of vectors. We
991 * don't bother to shrink single vectors because we figure that we can
992 * clean it up better with SSA than by inserting piles of vecN instructions
993 * to compact results.
995 int num_levels
= num_array_levels_in_array_of_vector_type(var
->type
);
997 return NULL
; /* Not an array of vectors */
999 struct vec_var_usage
*usage
=
1000 rzalloc_size(mem_ctx
, sizeof(*usage
) +
1001 num_levels
* sizeof(usage
->levels
[0]));
1003 usage
->num_levels
= num_levels
;
1004 const struct glsl_type
*type
= var
->type
;
1005 for (unsigned i
= 0; i
< num_levels
; i
++) {
1006 usage
->levels
[i
].array_len
= glsl_get_length(type
);
1007 type
= glsl_get_array_element(type
);
1009 assert(glsl_type_is_vector_or_scalar(type
));
1011 usage
->all_comps
= (1 << glsl_get_components(type
)) - 1;
1013 _mesa_hash_table_insert(var_usage_map
, var
, usage
);
1018 static struct vec_var_usage
*
1019 get_vec_deref_usage(nir_deref_instr
*deref
,
1020 struct hash_table
*var_usage_map
,
1021 nir_variable_mode modes
,
1022 bool add_usage_entry
, void *mem_ctx
)
1024 if (!(deref
->mode
& modes
))
1027 return get_vec_var_usage(nir_deref_instr_get_variable(deref
),
1028 var_usage_map
, add_usage_entry
, mem_ctx
);
1032 mark_deref_if_complex(nir_deref_instr
*deref
,
1033 struct hash_table
*var_usage_map
,
1034 nir_variable_mode modes
,
1037 if (!(deref
->mode
& modes
))
1040 /* Only bother with var derefs because nir_deref_instr_has_complex_use is
1043 if (deref
->deref_type
!= nir_deref_type_var
)
1046 if (!nir_deref_instr_has_complex_use(deref
))
1049 struct vec_var_usage
*usage
=
1050 get_vec_var_usage(deref
->var
, var_usage_map
, true, mem_ctx
);
1054 usage
->has_complex_use
= true;
1058 mark_deref_used(nir_deref_instr
*deref
,
1059 nir_component_mask_t comps_read
,
1060 nir_component_mask_t comps_written
,
1061 nir_deref_instr
*copy_deref
,
1062 struct hash_table
*var_usage_map
,
1063 nir_variable_mode modes
,
1066 if (!(deref
->mode
& modes
))
1069 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
1073 struct vec_var_usage
*usage
=
1074 get_vec_var_usage(var
, var_usage_map
, true, mem_ctx
);
1078 usage
->comps_read
|= comps_read
& usage
->all_comps
;
1079 usage
->comps_written
|= comps_written
& usage
->all_comps
;
1081 struct vec_var_usage
*copy_usage
= NULL
;
1083 copy_usage
= get_vec_deref_usage(copy_deref
, var_usage_map
, modes
,
1086 if (usage
->vars_copied
== NULL
) {
1087 usage
->vars_copied
= _mesa_pointer_set_create(mem_ctx
);
1089 _mesa_set_add(usage
->vars_copied
, copy_usage
);
1091 usage
->has_external_copy
= true;
1095 nir_deref_path path
;
1096 nir_deref_path_init(&path
, deref
, mem_ctx
);
1098 nir_deref_path copy_path
;
1100 nir_deref_path_init(©_path
, copy_deref
, mem_ctx
);
1102 unsigned copy_i
= 0;
1103 for (unsigned i
= 0; i
< usage
->num_levels
; i
++) {
1104 struct array_level_usage
*level
= &usage
->levels
[i
];
1105 nir_deref_instr
*deref
= path
.path
[i
+ 1];
1106 assert(deref
->deref_type
== nir_deref_type_array
||
1107 deref
->deref_type
== nir_deref_type_array_wildcard
);
1110 if (deref
->deref_type
== nir_deref_type_array
) {
1111 max_used
= nir_src_is_const(deref
->arr
.index
) ?
1112 nir_src_as_uint(deref
->arr
.index
) : UINT_MAX
;
1114 /* For wildcards, we read or wrote the whole thing. */
1115 assert(deref
->deref_type
== nir_deref_type_array_wildcard
);
1116 max_used
= level
->array_len
- 1;
1119 /* Match each wildcard level with the level on copy_usage */
1120 for (; copy_path
.path
[copy_i
+ 1]; copy_i
++) {
1121 if (copy_path
.path
[copy_i
+ 1]->deref_type
==
1122 nir_deref_type_array_wildcard
)
1125 struct array_level_usage
*copy_level
=
1126 ©_usage
->levels
[copy_i
++];
1128 if (level
->levels_copied
== NULL
) {
1129 level
->levels_copied
= _mesa_pointer_set_create(mem_ctx
);
1131 _mesa_set_add(level
->levels_copied
, copy_level
);
1133 /* We have a wildcard and it comes from a variable we aren't
1134 * tracking; flag it and we'll know to not shorten this array.
1136 level
->has_external_copy
= true;
1141 level
->max_written
= MAX2(level
->max_written
, max_used
);
1143 level
->max_read
= MAX2(level
->max_read
, max_used
);
1148 src_is_load_deref(nir_src src
, nir_src deref_src
)
1150 nir_intrinsic_instr
*load
= nir_src_as_intrinsic(src
);
1151 if (load
== NULL
|| load
->intrinsic
!= nir_intrinsic_load_deref
)
1154 assert(load
->src
[0].is_ssa
);
1156 return load
->src
[0].ssa
== deref_src
.ssa
;
1159 /* Returns all non-self-referential components of a store instruction. A
1160 * component is self-referential if it comes from the same component of a load
1161 * instruction on the same deref. If the only data in a particular component
1162 * of a variable came directly from that component then it's undefined. The
1163 * only way to get defined data into a component of a variable is for it to
1164 * get written there by something outside or from a different component.
1166 * This is a fairly common pattern in shaders that come from either GLSL IR or
1167 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1170 static nir_component_mask_t
1171 get_non_self_referential_store_comps(nir_intrinsic_instr
*store
)
1173 nir_component_mask_t comps
= nir_intrinsic_write_mask(store
);
1175 assert(store
->src
[1].is_ssa
);
1176 nir_instr
*src_instr
= store
->src
[1].ssa
->parent_instr
;
1177 if (src_instr
->type
!= nir_instr_type_alu
)
1180 nir_alu_instr
*src_alu
= nir_instr_as_alu(src_instr
);
1182 if (src_alu
->op
== nir_op_mov
) {
1183 /* If it's just a swizzle of a load from the same deref, discount any
1184 * channels that don't move in the swizzle.
1186 if (src_is_load_deref(src_alu
->src
[0].src
, store
->src
[0])) {
1187 for (unsigned i
= 0; i
< NIR_MAX_VEC_COMPONENTS
; i
++) {
1188 if (src_alu
->src
[0].swizzle
[i
] == i
)
1189 comps
&= ~(1u << i
);
1192 } else if (nir_op_is_vec(src_alu
->op
)) {
1193 /* If it's a vec, discount any channels that are just loads from the
1194 * same deref put in the same spot.
1196 for (unsigned i
= 0; i
< nir_op_infos
[src_alu
->op
].num_inputs
; i
++) {
1197 if (src_is_load_deref(src_alu
->src
[i
].src
, store
->src
[0]) &&
1198 src_alu
->src
[i
].swizzle
[0] == i
)
1199 comps
&= ~(1u << i
);
1207 find_used_components_impl(nir_function_impl
*impl
,
1208 struct hash_table
*var_usage_map
,
1209 nir_variable_mode modes
,
1212 nir_foreach_block(block
, impl
) {
1213 nir_foreach_instr(instr
, block
) {
1214 if (instr
->type
== nir_instr_type_deref
) {
1215 mark_deref_if_complex(nir_instr_as_deref(instr
),
1216 var_usage_map
, modes
, mem_ctx
);
1219 if (instr
->type
!= nir_instr_type_intrinsic
)
1222 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
1223 switch (intrin
->intrinsic
) {
1224 case nir_intrinsic_load_deref
:
1225 mark_deref_used(nir_src_as_deref(intrin
->src
[0]),
1226 nir_ssa_def_components_read(&intrin
->dest
.ssa
), 0,
1227 NULL
, var_usage_map
, modes
, mem_ctx
);
1230 case nir_intrinsic_store_deref
:
1231 mark_deref_used(nir_src_as_deref(intrin
->src
[0]),
1232 0, get_non_self_referential_store_comps(intrin
),
1233 NULL
, var_usage_map
, modes
, mem_ctx
);
1236 case nir_intrinsic_copy_deref
: {
1237 /* Just mark everything used for copies. */
1238 nir_deref_instr
*dst
= nir_src_as_deref(intrin
->src
[0]);
1239 nir_deref_instr
*src
= nir_src_as_deref(intrin
->src
[1]);
1240 mark_deref_used(dst
, 0, ~0, src
, var_usage_map
, modes
, mem_ctx
);
1241 mark_deref_used(src
, ~0, 0, dst
, var_usage_map
, modes
, mem_ctx
);
1253 shrink_vec_var_list(struct exec_list
*vars
,
1254 nir_variable_mode mode
,
1255 struct hash_table
*var_usage_map
)
1257 /* Initialize the components kept field of each variable. This is the
1258 * AND of the components written and components read. If a component is
1259 * written but never read, it's dead. If it is read but never written,
1260 * then all values read are undefined garbage and we may as well not read
1263 * The same logic applies to the array length. We make the array length
1264 * the minimum needed required length between read and write and plan to
1265 * discard any OOB access. The one exception here is indirect writes
1266 * because we don't know where they will land and we can't shrink an array
1267 * with indirect writes because previously in-bounds writes may become
1268 * out-of-bounds and have undefined behavior.
1270 * Also, if we have a copy that to/from something we can't shrink, we need
1271 * to leave components and array_len of any wildcards alone.
1273 nir_foreach_variable_in_list(var
, vars
) {
1274 if (var
->data
.mode
!= mode
)
1277 struct vec_var_usage
*usage
=
1278 get_vec_var_usage(var
, var_usage_map
, false, NULL
);
1282 assert(usage
->comps_kept
== 0);
1283 if (usage
->has_external_copy
|| usage
->has_complex_use
)
1284 usage
->comps_kept
= usage
->all_comps
;
1286 usage
->comps_kept
= usage
->comps_read
& usage
->comps_written
;
1288 for (unsigned i
= 0; i
< usage
->num_levels
; i
++) {
1289 struct array_level_usage
*level
= &usage
->levels
[i
];
1290 assert(level
->array_len
> 0);
1292 if (level
->max_written
== UINT_MAX
|| level
->has_external_copy
||
1293 usage
->has_complex_use
)
1294 continue; /* Can't shrink */
1296 unsigned max_used
= MIN2(level
->max_read
, level
->max_written
);
1297 level
->array_len
= MIN2(max_used
, level
->array_len
- 1) + 1;
1301 /* In order for variable copies to work, we have to have the same data type
1302 * on the source and the destination. In order to satisfy this, we run a
1303 * little fixed-point algorithm to transitively ensure that we get enough
1304 * components and array elements for this to hold for all copies.
1308 fp_progress
= false;
1309 nir_foreach_variable_in_list(var
, vars
) {
1310 if (var
->data
.mode
!= mode
)
1313 struct vec_var_usage
*var_usage
=
1314 get_vec_var_usage(var
, var_usage_map
, false, NULL
);
1315 if (!var_usage
|| !var_usage
->vars_copied
)
1318 set_foreach(var_usage
->vars_copied
, copy_entry
) {
1319 struct vec_var_usage
*copy_usage
= (void *)copy_entry
->key
;
1320 if (copy_usage
->comps_kept
!= var_usage
->comps_kept
) {
1321 nir_component_mask_t comps_kept
=
1322 (var_usage
->comps_kept
| copy_usage
->comps_kept
);
1323 var_usage
->comps_kept
= comps_kept
;
1324 copy_usage
->comps_kept
= comps_kept
;
1329 for (unsigned i
= 0; i
< var_usage
->num_levels
; i
++) {
1330 struct array_level_usage
*var_level
= &var_usage
->levels
[i
];
1331 if (!var_level
->levels_copied
)
1334 set_foreach(var_level
->levels_copied
, copy_entry
) {
1335 struct array_level_usage
*copy_level
= (void *)copy_entry
->key
;
1336 if (var_level
->array_len
!= copy_level
->array_len
) {
1337 unsigned array_len
=
1338 MAX2(var_level
->array_len
, copy_level
->array_len
);
1339 var_level
->array_len
= array_len
;
1340 copy_level
->array_len
= array_len
;
1346 } while (fp_progress
);
1348 bool vars_shrunk
= false;
1349 nir_foreach_variable_in_list_safe(var
, vars
) {
1350 if (var
->data
.mode
!= mode
)
1353 struct vec_var_usage
*usage
=
1354 get_vec_var_usage(var
, var_usage_map
, false, NULL
);
1358 bool shrunk
= false;
1359 const struct glsl_type
*vec_type
= var
->type
;
1360 for (unsigned i
= 0; i
< usage
->num_levels
; i
++) {
1361 /* If we've reduced the array to zero elements at some level, just
1362 * set comps_kept to 0 and delete the variable.
1364 if (usage
->levels
[i
].array_len
== 0) {
1365 usage
->comps_kept
= 0;
1369 assert(usage
->levels
[i
].array_len
<= glsl_get_length(vec_type
));
1370 if (usage
->levels
[i
].array_len
< glsl_get_length(vec_type
))
1372 vec_type
= glsl_get_array_element(vec_type
);
1374 assert(glsl_type_is_vector_or_scalar(vec_type
));
1376 assert(usage
->comps_kept
== (usage
->comps_kept
& usage
->all_comps
));
1377 if (usage
->comps_kept
!= usage
->all_comps
)
1380 if (usage
->comps_kept
== 0) {
1381 /* This variable is dead, remove it */
1383 exec_node_remove(&var
->node
);
1388 /* This variable doesn't need to be shrunk. Remove it from the
1389 * hash table so later steps will ignore it.
1391 _mesa_hash_table_remove_key(var_usage_map
, var
);
1395 /* Build the new var type */
1396 unsigned new_num_comps
= util_bitcount(usage
->comps_kept
);
1397 const struct glsl_type
*new_type
=
1398 glsl_vector_type(glsl_get_base_type(vec_type
), new_num_comps
);
1399 for (int i
= usage
->num_levels
- 1; i
>= 0; i
--) {
1400 assert(usage
->levels
[i
].array_len
> 0);
1401 /* If the original type was a matrix type, we'd like to keep that so
1402 * we don't convert matrices into arrays.
1404 if (i
== usage
->num_levels
- 1 &&
1405 glsl_type_is_matrix(glsl_without_array(var
->type
)) &&
1406 new_num_comps
> 1 && usage
->levels
[i
].array_len
> 1) {
1407 new_type
= glsl_matrix_type(glsl_get_base_type(new_type
),
1409 usage
->levels
[i
].array_len
);
1411 new_type
= glsl_array_type(new_type
, usage
->levels
[i
].array_len
, 0);
1414 var
->type
= new_type
;
1423 vec_deref_is_oob(nir_deref_instr
*deref
,
1424 struct vec_var_usage
*usage
)
1426 nir_deref_path path
;
1427 nir_deref_path_init(&path
, deref
, NULL
);
1430 for (unsigned i
= 0; i
< usage
->num_levels
; i
++) {
1431 nir_deref_instr
*p
= path
.path
[i
+ 1];
1432 if (p
->deref_type
== nir_deref_type_array_wildcard
)
1435 if (nir_src_is_const(p
->arr
.index
) &&
1436 nir_src_as_uint(p
->arr
.index
) >= usage
->levels
[i
].array_len
) {
1442 nir_deref_path_finish(&path
);
1448 vec_deref_is_dead_or_oob(nir_deref_instr
*deref
,
1449 struct hash_table
*var_usage_map
,
1450 nir_variable_mode modes
)
1452 struct vec_var_usage
*usage
=
1453 get_vec_deref_usage(deref
, var_usage_map
, modes
, false, NULL
);
1457 return usage
->comps_kept
== 0 || vec_deref_is_oob(deref
, usage
);
1461 shrink_vec_var_access_impl(nir_function_impl
*impl
,
1462 struct hash_table
*var_usage_map
,
1463 nir_variable_mode modes
)
1466 nir_builder_init(&b
, impl
);
1468 nir_foreach_block(block
, impl
) {
1469 nir_foreach_instr_safe(instr
, block
) {
1470 switch (instr
->type
) {
1471 case nir_instr_type_deref
: {
1472 nir_deref_instr
*deref
= nir_instr_as_deref(instr
);
1473 if (!(deref
->mode
& modes
))
1476 /* Clean up any dead derefs we find lying around. They may refer
1477 * to variables we've deleted.
1479 if (nir_deref_instr_remove_if_unused(deref
))
1482 /* Update the type in the deref to keep the types consistent as
1483 * you walk down the chain. We don't need to check if this is one
1484 * of the derefs we're shrinking because this is a no-op if it
1485 * isn't. The worst that could happen is that we accidentally fix
1488 if (deref
->deref_type
== nir_deref_type_var
) {
1489 deref
->type
= deref
->var
->type
;
1490 } else if (deref
->deref_type
== nir_deref_type_array
||
1491 deref
->deref_type
== nir_deref_type_array_wildcard
) {
1492 nir_deref_instr
*parent
= nir_deref_instr_parent(deref
);
1493 assert(glsl_type_is_array(parent
->type
) ||
1494 glsl_type_is_matrix(parent
->type
));
1495 deref
->type
= glsl_get_array_element(parent
->type
);
1500 case nir_instr_type_intrinsic
: {
1501 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
1503 /* If we have a copy whose source or destination has been deleted
1504 * because we determined the variable was dead, then we just
1505 * delete the copy instruction. If the source variable was dead
1506 * then it was writing undefined garbage anyway and if it's the
1507 * destination variable that's dead then the write isn't needed.
1509 if (intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
1510 nir_deref_instr
*dst
= nir_src_as_deref(intrin
->src
[0]);
1511 nir_deref_instr
*src
= nir_src_as_deref(intrin
->src
[1]);
1512 if (vec_deref_is_dead_or_oob(dst
, var_usage_map
, modes
) ||
1513 vec_deref_is_dead_or_oob(src
, var_usage_map
, modes
)) {
1514 nir_instr_remove(&intrin
->instr
);
1515 nir_deref_instr_remove_if_unused(dst
);
1516 nir_deref_instr_remove_if_unused(src
);
1521 if (intrin
->intrinsic
!= nir_intrinsic_load_deref
&&
1522 intrin
->intrinsic
!= nir_intrinsic_store_deref
)
1525 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
1526 if (!(deref
->mode
& modes
))
1529 struct vec_var_usage
*usage
=
1530 get_vec_deref_usage(deref
, var_usage_map
, modes
, false, NULL
);
1534 if (usage
->comps_kept
== 0 || vec_deref_is_oob(deref
, usage
)) {
1535 if (intrin
->intrinsic
== nir_intrinsic_load_deref
) {
1537 nir_ssa_undef(&b
, intrin
->dest
.ssa
.num_components
,
1538 intrin
->dest
.ssa
.bit_size
);
1539 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
1540 nir_src_for_ssa(u
));
1542 nir_instr_remove(&intrin
->instr
);
1543 nir_deref_instr_remove_if_unused(deref
);
1547 /* If we're not dropping any components, there's no need to
1550 if (usage
->comps_kept
== usage
->all_comps
)
1553 if (intrin
->intrinsic
== nir_intrinsic_load_deref
) {
1554 b
.cursor
= nir_after_instr(&intrin
->instr
);
1556 nir_ssa_def
*undef
=
1557 nir_ssa_undef(&b
, 1, intrin
->dest
.ssa
.bit_size
);
1558 nir_ssa_def
*vec_srcs
[NIR_MAX_VEC_COMPONENTS
];
1560 for (unsigned i
= 0; i
< intrin
->num_components
; i
++) {
1561 if (usage
->comps_kept
& (1u << i
))
1562 vec_srcs
[i
] = nir_channel(&b
, &intrin
->dest
.ssa
, c
++);
1564 vec_srcs
[i
] = undef
;
1566 nir_ssa_def
*vec
= nir_vec(&b
, vec_srcs
, intrin
->num_components
);
1568 nir_ssa_def_rewrite_uses_after(&intrin
->dest
.ssa
,
1569 nir_src_for_ssa(vec
),
1572 /* The SSA def is now only used by the swizzle. It's safe to
1573 * shrink the number of components.
1575 assert(list_length(&intrin
->dest
.ssa
.uses
) == c
);
1576 intrin
->num_components
= c
;
1577 intrin
->dest
.ssa
.num_components
= c
;
1579 nir_component_mask_t write_mask
=
1580 nir_intrinsic_write_mask(intrin
);
1582 unsigned swizzle
[NIR_MAX_VEC_COMPONENTS
];
1583 nir_component_mask_t new_write_mask
= 0;
1585 for (unsigned i
= 0; i
< intrin
->num_components
; i
++) {
1586 if (usage
->comps_kept
& (1u << i
)) {
1588 if (write_mask
& (1u << i
))
1589 new_write_mask
|= 1u << c
;
1594 b
.cursor
= nir_before_instr(&intrin
->instr
);
1596 nir_ssa_def
*swizzled
=
1597 nir_swizzle(&b
, intrin
->src
[1].ssa
, swizzle
, c
);
1599 /* Rewrite to use the compacted source */
1600 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[1],
1601 nir_src_for_ssa(swizzled
));
1602 nir_intrinsic_set_write_mask(intrin
, new_write_mask
);
1603 intrin
->num_components
= c
;
1616 function_impl_has_vars_with_modes(nir_function_impl
*impl
,
1617 nir_variable_mode modes
)
1619 nir_shader
*shader
= impl
->function
->shader
;
1621 if (modes
& ~nir_var_function_temp
) {
1622 nir_foreach_variable_with_modes(var
, shader
,
1623 modes
& ~nir_var_function_temp
)
1627 if ((modes
& nir_var_function_temp
) && !exec_list_is_empty(&impl
->locals
))
1633 /** Attempt to shrink arrays of vectors
1635 * This pass looks at variables which contain a vector or an array (possibly
1636 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1637 * or array. If the pass can prove that a component of a vector (or array of
1638 * vectors) is never really used, then that component will be removed.
1639 * Similarly, the pass attempts to shorten arrays based on what elements it
1640 * can prove are never read or never contain valid data.
1643 nir_shrink_vec_array_vars(nir_shader
*shader
, nir_variable_mode modes
)
1645 assert((modes
& (nir_var_shader_temp
| nir_var_function_temp
)) == modes
);
1647 void *mem_ctx
= ralloc_context(NULL
);
1649 struct hash_table
*var_usage_map
=
1650 _mesa_pointer_hash_table_create(mem_ctx
);
1652 bool has_vars_to_shrink
= false;
1653 nir_foreach_function(function
, shader
) {
1654 if (!function
->impl
)
1657 /* Don't even bother crawling the IR if we don't have any variables.
1658 * Given that this pass deletes any unused variables, it's likely that
1659 * we will be in this scenario eventually.
1661 if (function_impl_has_vars_with_modes(function
->impl
, modes
)) {
1662 has_vars_to_shrink
= true;
1663 find_used_components_impl(function
->impl
, var_usage_map
,
1667 if (!has_vars_to_shrink
) {
1668 ralloc_free(mem_ctx
);
1669 nir_shader_preserve_all_metadata(shader
);
1673 bool globals_shrunk
= false;
1674 if (modes
& nir_var_shader_temp
) {
1675 globals_shrunk
= shrink_vec_var_list(&shader
->variables
,
1676 nir_var_shader_temp
,
1680 bool progress
= false;
1681 nir_foreach_function(function
, shader
) {
1682 if (!function
->impl
)
1685 bool locals_shrunk
= false;
1686 if (modes
& nir_var_function_temp
) {
1687 locals_shrunk
= shrink_vec_var_list(&function
->impl
->locals
,
1688 nir_var_function_temp
,
1692 if (globals_shrunk
|| locals_shrunk
) {
1693 shrink_vec_var_access_impl(function
->impl
, var_usage_map
, modes
);
1695 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
1696 nir_metadata_dominance
);
1699 nir_metadata_preserve(function
->impl
, nir_metadata_all
);
1703 ralloc_free(mem_ctx
);