2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
29 /* Note: these fields are only valid for leaf nodes */
31 unsigned next_array_idx
;
33 nir_deref_path first_src_path
;
35 /* The index of the first read of the source path that's part of the copy
36 * we're matching. If the last write to the source path is after this, we
37 * would get a different result from reading it at the end and we can't
40 unsigned first_src_read
;
42 /* The last time there was a write to this node. */
43 unsigned last_overwritten
;
45 /* The last time there was a write to this node which successfully advanced
46 * next_array_idx. This helps us catch any intervening aliased writes.
48 unsigned last_successful_write
;
50 unsigned num_children
;
51 struct match_node
*children
[];
55 /* Map from nir_variable * -> match_node */
56 struct hash_table
*var_nodes
;
57 /* Map from cast nir_deref_instr * -> match_node */
58 struct hash_table
*cast_nodes
;
67 static struct match_node
*
68 create_match_node(const struct glsl_type
*type
, struct match_state
*state
)
70 unsigned num_children
= 0;
71 if (glsl_type_is_array_or_matrix(type
)) {
72 /* One for wildcards */
73 num_children
= glsl_get_length(type
) + 1;
74 } else if (glsl_type_is_struct_or_ifc(type
)) {
75 num_children
= glsl_get_length(type
);
78 struct match_node
*node
= rzalloc_size(state
->dead_ctx
,
79 sizeof(struct match_node
) +
80 num_children
* sizeof(struct match_node
*));
81 node
->num_children
= num_children
;
82 node
->src_wildcard_idx
= -1;
83 node
->first_src_read
= UINT32_MAX
;
87 static struct match_node
*
88 node_for_deref(nir_deref_instr
*instr
, struct match_node
*parent
,
89 struct match_state
*state
)
92 switch (instr
->deref_type
) {
93 case nir_deref_type_var
: {
94 struct hash_entry
*entry
=
95 _mesa_hash_table_search(state
->var_nodes
, instr
->var
);
99 struct match_node
*node
= create_match_node(instr
->type
, state
);
100 _mesa_hash_table_insert(state
->var_nodes
, instr
->var
, node
);
105 case nir_deref_type_cast
: {
106 struct hash_entry
*entry
=
107 _mesa_hash_table_search(state
->cast_nodes
, instr
);
111 struct match_node
*node
= create_match_node(instr
->type
, state
);
112 _mesa_hash_table_insert(state
->cast_nodes
, instr
, node
);
117 case nir_deref_type_array_wildcard
:
118 idx
= parent
->num_children
- 1;
121 case nir_deref_type_array
:
122 if (nir_src_is_const(instr
->arr
.index
)) {
123 idx
= nir_src_as_uint(instr
->arr
.index
);
124 assert(idx
< parent
->num_children
- 1);
126 idx
= parent
->num_children
- 1;
130 case nir_deref_type_struct
:
131 idx
= instr
->strct
.index
;
135 unreachable("bad deref type");
138 assert(idx
< parent
->num_children
);
139 if (parent
->children
[idx
]) {
140 return parent
->children
[idx
];
142 struct match_node
*node
= create_match_node(instr
->type
, state
);
143 parent
->children
[idx
] = node
;
148 static struct match_node
*
149 node_for_wildcard(const struct glsl_type
*type
, struct match_node
*parent
,
150 struct match_state
*state
)
152 assert(glsl_type_is_array_or_matrix(type
));
153 unsigned idx
= glsl_get_length(type
);
155 if (parent
->children
[idx
]) {
156 return parent
->children
[idx
];
158 struct match_node
*node
=
159 create_match_node(glsl_get_array_element(type
), state
);
160 parent
->children
[idx
] = node
;
165 static struct match_node
*
166 node_for_path(nir_deref_path
*path
, struct match_state
*state
)
168 struct match_node
*node
= NULL
;
169 for (nir_deref_instr
**instr
= path
->path
; *instr
; instr
++)
170 node
= node_for_deref(*instr
, node
, state
);
175 static struct match_node
*
176 node_for_path_with_wildcard(nir_deref_path
*path
, unsigned wildcard_idx
,
177 struct match_state
*state
)
179 struct match_node
*node
= NULL
;
181 for (nir_deref_instr
**instr
= path
->path
; *instr
; instr
++, idx
++) {
182 if (idx
== wildcard_idx
)
183 node
= node_for_wildcard((*(instr
- 1))->type
, node
, state
);
185 node
= node_for_deref(*instr
, node
, state
);
191 typedef void (*match_cb
)(struct match_node
*, struct match_state
*);
194 _foreach_aliasing(nir_deref_instr
**deref
, match_cb cb
,
195 struct match_node
*node
, struct match_state
*state
)
197 if (*deref
== NULL
) {
202 switch ((*deref
)->deref_type
) {
203 case nir_deref_type_struct
: {
204 struct match_node
*child
= node
->children
[(*deref
)->strct
.index
];
206 _foreach_aliasing(deref
+ 1, cb
, child
, state
);
210 case nir_deref_type_array
:
211 case nir_deref_type_array_wildcard
: {
212 if ((*deref
)->deref_type
== nir_deref_type_array_wildcard
||
213 !nir_src_is_const((*deref
)->arr
.index
)) {
214 /* This access may touch any index, so we have to visit all of
217 for (unsigned i
= 0; i
< node
->num_children
; i
++) {
218 if (node
->children
[i
])
219 _foreach_aliasing(deref
+ 1, cb
, node
->children
[i
], state
);
222 /* Visit the wildcard entry if any */
223 if (node
->children
[node
->num_children
- 1]) {
224 _foreach_aliasing(deref
+ 1, cb
,
225 node
->children
[node
->num_children
- 1], state
);
228 unsigned index
= nir_src_as_uint((*deref
)->arr
.index
);
229 /* Check that the index is in-bounds */
230 if (index
< node
->num_children
- 1 && node
->children
[index
])
231 _foreach_aliasing(deref
+ 1, cb
, node
->children
[index
], state
);
237 unreachable("bad deref type");
242 _foreach_child(match_cb cb
, struct match_node
*node
, struct match_state
*state
)
244 if (node
->num_children
== 0) {
247 for (unsigned i
= 0; i
< node
->num_children
; i
++)
248 _foreach_child(cb
, node
->children
[i
], state
);
252 /* Given a deref path, find all the leaf deref nodes that alias it. */
255 foreach_aliasing_node(nir_deref_path
*path
,
257 struct match_state
*state
)
259 if (path
->path
[0]->deref_type
== nir_deref_type_var
) {
260 struct hash_entry
*entry
= _mesa_hash_table_search(state
->var_nodes
,
263 _foreach_aliasing(&path
->path
[1], cb
, entry
->data
, state
);
265 hash_table_foreach(state
->cast_nodes
, entry
)
266 _foreach_child(cb
, entry
->data
, state
);
268 /* Casts automatically alias anything that isn't a cast */
269 assert(path
->path
[0]->deref_type
== nir_deref_type_cast
);
270 hash_table_foreach(state
->var_nodes
, entry
)
271 _foreach_child(cb
, entry
->data
, state
);
273 /* Casts alias other casts if the casts are different or if they're the
274 * same and the path from the cast may alias as per the usual rules.
276 hash_table_foreach(state
->cast_nodes
, entry
) {
277 const nir_deref_instr
*cast
= entry
->key
;
278 assert(cast
->deref_type
== nir_deref_type_cast
);
279 if (cast
== path
->path
[0])
280 _foreach_aliasing(&path
->path
[1], cb
, entry
->data
, state
);
282 _foreach_child(cb
, entry
->data
, state
);
287 static nir_deref_instr
*
288 build_wildcard_deref(nir_builder
*b
, nir_deref_path
*path
,
289 unsigned wildcard_idx
)
291 assert(path
->path
[wildcard_idx
]->deref_type
== nir_deref_type_array
);
293 nir_deref_instr
*tail
=
294 nir_build_deref_array_wildcard(b
, path
->path
[wildcard_idx
- 1]);
296 for (unsigned i
= wildcard_idx
+ 1; path
->path
[i
]; i
++)
297 tail
= nir_build_deref_follower(b
, tail
, path
->path
[i
]);
303 clobber(struct match_node
*node
, struct match_state
*state
)
305 node
->last_overwritten
= state
->cur_instr
;
309 try_match_deref(nir_deref_path
*base_path
, int *path_array_idx
,
310 nir_deref_path
*deref_path
, int arr_idx
,
311 nir_deref_instr
*dst
)
313 for (int i
= 0; ; i
++) {
314 nir_deref_instr
*b
= base_path
->path
[i
];
315 nir_deref_instr
*d
= deref_path
->path
[i
];
316 /* They have to be the same length */
317 if ((b
== NULL
) != (d
== NULL
))
323 /* This can happen if one is a deref_array and the other a wildcard */
324 if (b
->deref_type
!= d
->deref_type
)
327 switch (b
->deref_type
) {
328 case nir_deref_type_var
:
329 if (b
->var
!= d
->var
)
333 case nir_deref_type_array
:
334 assert(b
->arr
.index
.is_ssa
&& d
->arr
.index
.is_ssa
);
335 const bool const_b_idx
= nir_src_is_const(b
->arr
.index
);
336 const bool const_d_idx
= nir_src_is_const(d
->arr
.index
);
337 const unsigned b_idx
= const_b_idx
? nir_src_as_uint(b
->arr
.index
) : 0;
338 const unsigned d_idx
= const_d_idx
? nir_src_as_uint(d
->arr
.index
) : 0;
340 /* If we don't have an index into the path yet or if this entry in
341 * the path is at the array index, see if this is a candidate. We're
342 * looking for an index which is zero in the base deref and arr_idx
343 * in the search deref and has a matching array size.
345 if ((*path_array_idx
< 0 || *path_array_idx
== i
) &&
346 const_b_idx
&& b_idx
== 0 &&
347 const_d_idx
&& d_idx
== arr_idx
&&
348 glsl_get_length(nir_deref_instr_parent(b
)->type
) ==
349 glsl_get_length(nir_deref_instr_parent(dst
)->type
)) {
354 /* We're at the array index but not a candidate */
355 if (*path_array_idx
== i
)
358 /* If we're not the path array index, we must match exactly. We
359 * could probably just compare SSA values and trust in copy
360 * propagation but doing it ourselves means this pass can run a bit
363 if (b
->arr
.index
.ssa
== d
->arr
.index
.ssa
||
364 (const_b_idx
&& const_d_idx
&& b_idx
== d_idx
))
369 case nir_deref_type_array_wildcard
:
372 case nir_deref_type_struct
:
373 if (b
->strct
.index
!= d
->strct
.index
)
378 unreachable("Invalid deref type in a path");
382 /* If we got here without failing, we've matched. However, it isn't an
383 * array match unless we found an altered array index.
385 return *path_array_idx
> 0;
389 handle_read(nir_deref_instr
*src
, struct match_state
*state
)
391 /* We only need to create an entry for sources that might be used to form
392 * an array copy. Hence no indirects or indexing into a vector.
394 if (nir_deref_instr_has_indirect(src
) ||
395 nir_deref_instr_is_known_out_of_bounds(src
) ||
396 (src
->deref_type
== nir_deref_type_array
&&
397 glsl_type_is_vector(nir_src_as_deref(src
->parent
)->type
)))
400 nir_deref_path src_path
;
401 nir_deref_path_init(&src_path
, src
, state
->dead_ctx
);
403 /* Create a node for this source if it doesn't exist. The point of this is
404 * to know which nodes aliasing a given store we actually need to care
405 * about, to avoid creating an excessive amount of nodes.
407 node_for_path(&src_path
, state
);
410 /* The core implementation, which is used for both copies and writes. Return
411 * true if a copy is created.
414 handle_write(nir_deref_instr
*dst
, nir_deref_instr
*src
,
415 unsigned write_index
, unsigned read_index
,
416 struct match_state
*state
)
418 nir_builder
*b
= &state
->builder
;
420 nir_deref_path dst_path
;
421 nir_deref_path_init(&dst_path
, dst
, state
->dead_ctx
);
424 for (nir_deref_instr
**instr
= dst_path
.path
; *instr
; instr
++, idx
++) {
425 if ((*instr
)->deref_type
!= nir_deref_type_array
)
428 /* Get the entry where the index is replaced by a wildcard, so that we
429 * hopefully can keep matching an array copy.
431 struct match_node
*dst_node
=
432 node_for_path_with_wildcard(&dst_path
, idx
, state
);
437 if (nir_src_as_uint((*instr
)->arr
.index
) != dst_node
->next_array_idx
)
440 if (dst_node
->next_array_idx
== 0) {
441 /* At this point there may be multiple source indices which are zero,
442 * so we can't pin down the actual source index. Just store it and
445 nir_deref_path_init(&dst_node
->first_src_path
, src
, state
->dead_ctx
);
447 nir_deref_path src_path
;
448 nir_deref_path_init(&src_path
, src
, state
->dead_ctx
);
449 bool result
= try_match_deref(&dst_node
->first_src_path
,
450 &dst_node
->src_wildcard_idx
,
451 &src_path
, dst_node
->next_array_idx
,
453 nir_deref_path_finish(&src_path
);
458 /* Check if an aliasing write clobbered the array after the last normal
459 * write. For example, with a sequence like this:
461 * dst[0][*] = src[0][*];
462 * dst[0][0] = 0; // invalidates the array copy dst[*][*] = src[*][*]
463 * dst[1][*] = src[1][*];
465 * Note that the second write wouldn't reset the entry for dst[*][*]
466 * by itself, but it'll be caught by this check when processing the
469 if (dst_node
->last_successful_write
< dst_node
->last_overwritten
)
472 dst_node
->last_successful_write
= write_index
;
474 /* In this case we've successfully processed an array element. Check if
475 * this is the last, so that we can emit an array copy.
477 dst_node
->next_array_idx
++;
478 dst_node
->first_src_read
= MIN2(dst_node
->first_src_read
, read_index
);
479 if (dst_node
->next_array_idx
> 1 &&
480 dst_node
->next_array_idx
== glsl_get_length((*(instr
- 1))->type
)) {
481 /* Make sure that nothing was overwritten. */
482 struct match_node
*src_node
=
483 node_for_path_with_wildcard(&dst_node
->first_src_path
,
484 dst_node
->src_wildcard_idx
,
487 if (src_node
->last_overwritten
<= dst_node
->first_src_read
) {
488 nir_copy_deref(b
, build_wildcard_deref(b
, &dst_path
, idx
),
489 build_wildcard_deref(b
, &dst_node
->first_src_path
,
490 dst_node
->src_wildcard_idx
));
491 foreach_aliasing_node(&dst_path
, clobber
, state
);
499 dst_node
->next_array_idx
= 0;
500 dst_node
->src_wildcard_idx
= -1;
501 dst_node
->last_successful_write
= 0;
502 dst_node
->first_src_read
= UINT32_MAX
;
505 /* Mark everything aliasing dst_path as clobbered. This needs to happen
506 * last since in the loop above we need to know what last clobbered
507 * dst_node and this overwrites that.
509 foreach_aliasing_node(&dst_path
, clobber
, state
);
515 opt_find_array_copies_block(nir_builder
*b
, nir_block
*block
,
516 struct match_state
*state
)
518 bool progress
= false;
520 unsigned next_index
= 0;
522 _mesa_hash_table_clear(state
->var_nodes
, NULL
);
523 _mesa_hash_table_clear(state
->cast_nodes
, NULL
);
525 nir_foreach_instr(instr
, block
) {
526 if (instr
->type
!= nir_instr_type_intrinsic
)
529 /* Index the instructions before we do anything else. */
530 instr
->index
= next_index
++;
532 /* Save the index of this instruction */
533 state
->cur_instr
= instr
->index
;
535 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
537 if (intrin
->intrinsic
== nir_intrinsic_load_deref
) {
538 handle_read(nir_src_as_deref(intrin
->src
[0]), state
);
542 if (intrin
->intrinsic
!= nir_intrinsic_copy_deref
&&
543 intrin
->intrinsic
!= nir_intrinsic_store_deref
)
546 nir_deref_instr
*dst_deref
= nir_src_as_deref(intrin
->src
[0]);
548 /* The destination must be local. If we see a non-local store, we
549 * continue on because it won't affect local stores or read-only
552 if (dst_deref
->mode
!= nir_var_function_temp
)
555 /* If there are any known out-of-bounds writes, then we can just skip
556 * this write as it's undefined and won't contribute to building up an
557 * array copy anyways.
559 if (nir_deref_instr_is_known_out_of_bounds(dst_deref
))
562 nir_deref_instr
*src_deref
;
563 unsigned load_index
= 0;
564 if (intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
565 src_deref
= nir_src_as_deref(intrin
->src
[1]);
566 load_index
= intrin
->instr
.index
;
568 assert(intrin
->intrinsic
== nir_intrinsic_store_deref
);
569 nir_intrinsic_instr
*load
= nir_src_as_intrinsic(intrin
->src
[1]);
570 if (load
== NULL
|| load
->intrinsic
!= nir_intrinsic_load_deref
) {
573 src_deref
= nir_src_as_deref(load
->src
[0]);
574 load_index
= load
->instr
.index
;
577 if (nir_intrinsic_write_mask(intrin
) !=
578 (1 << glsl_get_components(dst_deref
->type
)) - 1) {
583 /* The source must be either local or something that's guaranteed to be
586 const nir_variable_mode read_only_modes
=
587 nir_var_shader_in
| nir_var_uniform
| nir_var_system_value
;
589 !(src_deref
->mode
& (nir_var_function_temp
| read_only_modes
))) {
593 /* There must be no indirects in the source or destination and no known
594 * out-of-bounds accesses in the source, and the copy must be fully
595 * qualified, or else we can't build up the array copy. We handled
596 * out-of-bounds accesses to the dest above. The types must match, since
597 * copy_deref currently can't bitcast mismatched deref types.
600 (nir_deref_instr_has_indirect(src_deref
) ||
601 nir_deref_instr_is_known_out_of_bounds(src_deref
) ||
602 nir_deref_instr_has_indirect(dst_deref
) ||
603 !glsl_type_is_vector_or_scalar(src_deref
->type
) ||
604 glsl_get_bare_type(src_deref
->type
) !=
605 glsl_get_bare_type(dst_deref
->type
))) {
609 state
->builder
.cursor
= nir_after_instr(instr
);
610 progress
|= handle_write(dst_deref
, src_deref
, instr
->index
,
618 opt_find_array_copies_impl(nir_function_impl
*impl
)
621 nir_builder_init(&b
, impl
);
623 bool progress
= false;
625 struct match_state s
;
626 s
.dead_ctx
= ralloc_context(NULL
);
627 s
.var_nodes
= _mesa_pointer_hash_table_create(s
.dead_ctx
);
628 s
.cast_nodes
= _mesa_pointer_hash_table_create(s
.dead_ctx
);
629 nir_builder_init(&s
.builder
, impl
);
631 nir_foreach_block(block
, impl
) {
632 if (opt_find_array_copies_block(&b
, block
, &s
))
636 ralloc_free(s
.dead_ctx
);
639 nir_metadata_preserve(impl
, nir_metadata_block_index
|
640 nir_metadata_dominance
);
642 nir_metadata_preserve(impl
, nir_metadata_all
);
649 * This peephole optimization looks for a series of load/store_deref or
650 * copy_deref instructions that copy an array from one variable to another and
651 * turns it into a copy_deref that copies the entire array. The pattern it
652 * looks for is extremely specific but it's good enough to pick up on the
653 * input array copies in DXVK and should also be able to pick up the sequence
654 * generated by spirv_to_nir for a OpLoad of a large composite followed by
657 * TODO: Support out-of-order copies.
660 nir_opt_find_array_copies(nir_shader
*shader
)
662 bool progress
= false;
664 nir_foreach_function(function
, shader
) {
665 if (function
->impl
&& opt_find_array_copies_impl(function
->impl
))