2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
29 /* Note: these fields are only valid for leaf nodes */
31 unsigned next_array_idx
;
33 nir_deref_path first_src_path
;
35 /* The index of the first read of the source path that's part of the copy
36 * we're matching. If the last write to the source path is after this, we
37 * would get a different result from reading it at the end and we can't
40 unsigned first_src_read
;
42 /* The last time there was a write to this node. */
43 unsigned last_overwritten
;
45 /* The last time there was a write to this node which successfully advanced
46 * next_array_idx. This helps us catch any intervening aliased writes.
48 unsigned last_successful_write
;
50 unsigned num_children
;
51 struct match_node
*children
[];
55 /* Map from nir_variable * -> match_node */
56 struct hash_table
*table
;
65 static struct match_node
*
66 create_match_node(const struct glsl_type
*type
, struct match_state
*state
)
68 unsigned num_children
= 0;
69 if (glsl_type_is_array_or_matrix(type
)) {
70 /* One for wildcards */
71 num_children
= glsl_get_length(type
) + 1;
72 } else if (glsl_type_is_struct_or_ifc(type
)) {
73 num_children
= glsl_get_length(type
);
76 struct match_node
*node
= rzalloc_size(state
->dead_ctx
,
77 sizeof(struct match_node
) +
78 num_children
* sizeof(struct match_node
*));
79 node
->num_children
= num_children
;
80 node
->src_wildcard_idx
= -1;
81 node
->first_src_read
= UINT32_MAX
;
85 static struct match_node
*
86 node_for_deref(nir_deref_instr
*instr
, struct match_node
*parent
,
87 struct match_state
*state
)
90 switch (instr
->deref_type
) {
91 case nir_deref_type_var
: {
92 struct hash_entry
*entry
= _mesa_hash_table_search(state
->table
, instr
->var
);
96 struct match_node
*node
= create_match_node(instr
->type
, state
);
97 _mesa_hash_table_insert(state
->table
, instr
->var
, node
);
102 case nir_deref_type_array_wildcard
:
103 idx
= parent
->num_children
- 1;
106 case nir_deref_type_array
:
107 if (nir_src_is_const(instr
->arr
.index
)) {
108 idx
= nir_src_as_uint(instr
->arr
.index
);
109 assert(idx
< parent
->num_children
- 1);
111 idx
= parent
->num_children
- 1;
115 case nir_deref_type_struct
:
116 idx
= instr
->strct
.index
;
120 unreachable("bad deref type");
123 assert(idx
< parent
->num_children
);
124 if (parent
->children
[idx
]) {
125 return parent
->children
[idx
];
127 struct match_node
*node
= create_match_node(instr
->type
, state
);
128 parent
->children
[idx
] = node
;
133 static struct match_node
*
134 node_for_wildcard(const struct glsl_type
*type
, struct match_node
*parent
,
135 struct match_state
*state
)
137 assert(glsl_type_is_array_or_matrix(type
));
138 unsigned idx
= glsl_get_length(type
);
140 if (parent
->children
[idx
]) {
141 return parent
->children
[idx
];
143 struct match_node
*node
=
144 create_match_node(glsl_get_array_element(type
), state
);
145 parent
->children
[idx
] = node
;
150 static struct match_node
*
151 node_for_path(nir_deref_path
*path
, struct match_state
*state
)
153 struct match_node
*node
= NULL
;
154 for (nir_deref_instr
**instr
= path
->path
; *instr
; instr
++)
155 node
= node_for_deref(*instr
, node
, state
);
160 static struct match_node
*
161 node_for_path_with_wildcard(nir_deref_path
*path
, unsigned wildcard_idx
,
162 struct match_state
*state
)
164 struct match_node
*node
= NULL
;
166 for (nir_deref_instr
**instr
= path
->path
; *instr
; instr
++, idx
++) {
167 if (idx
== wildcard_idx
)
168 node
= node_for_wildcard((*(instr
- 1))->type
, node
, state
);
170 node
= node_for_deref(*instr
, node
, state
);
176 typedef void (*match_cb
)(struct match_node
*, struct match_state
*);
179 _foreach_aliasing(nir_deref_instr
**deref
, match_cb cb
,
180 struct match_node
*node
, struct match_state
*state
)
182 if (*deref
== NULL
) {
187 switch ((*deref
)->deref_type
) {
188 case nir_deref_type_struct
: {
189 struct match_node
*child
= node
->children
[(*deref
)->strct
.index
];
191 _foreach_aliasing(deref
+ 1, cb
, child
, state
);
195 case nir_deref_type_array
:
196 case nir_deref_type_array_wildcard
: {
197 if ((*deref
)->deref_type
== nir_deref_type_array_wildcard
||
198 !nir_src_is_const((*deref
)->arr
.index
)) {
199 /* This access may touch any index, so we have to visit all of
202 for (unsigned i
= 0; i
< node
->num_children
; i
++) {
203 if (node
->children
[i
])
204 _foreach_aliasing(deref
+ 1, cb
, node
->children
[i
], state
);
207 /* Visit the wildcard entry if any */
208 if (node
->children
[node
->num_children
- 1]) {
209 _foreach_aliasing(deref
+ 1, cb
,
210 node
->children
[node
->num_children
- 1], state
);
213 unsigned index
= nir_src_as_uint((*deref
)->arr
.index
);
214 /* Check that the index is in-bounds */
215 if (index
< node
->num_children
- 1 && node
->children
[index
])
216 _foreach_aliasing(deref
+ 1, cb
, node
->children
[index
], state
);
222 unreachable("bad deref type");
226 /* Given a deref path, find all the leaf deref nodes that alias it. */
229 foreach_aliasing_node(nir_deref_path
*path
,
231 struct match_state
*state
)
233 assert(path
->path
[0]->deref_type
== nir_deref_type_var
);
234 struct hash_entry
*entry
= _mesa_hash_table_search(state
->table
,
237 _foreach_aliasing(&path
->path
[1], cb
, entry
->data
, state
);
240 static nir_deref_instr
*
241 build_wildcard_deref(nir_builder
*b
, nir_deref_path
*path
,
242 unsigned wildcard_idx
)
244 assert(path
->path
[wildcard_idx
]->deref_type
== nir_deref_type_array
);
246 nir_deref_instr
*tail
=
247 nir_build_deref_array_wildcard(b
, path
->path
[wildcard_idx
- 1]);
249 for (unsigned i
= wildcard_idx
+ 1; path
->path
[i
]; i
++)
250 tail
= nir_build_deref_follower(b
, tail
, path
->path
[i
]);
256 clobber(struct match_node
*node
, struct match_state
*state
)
258 node
->last_overwritten
= state
->cur_instr
;
262 try_match_deref(nir_deref_path
*base_path
, int *path_array_idx
,
263 nir_deref_path
*deref_path
, int arr_idx
)
265 for (int i
= 0; ; i
++) {
266 nir_deref_instr
*b
= base_path
->path
[i
];
267 nir_deref_instr
*d
= deref_path
->path
[i
];
268 /* They have to be the same length */
269 if ((b
== NULL
) != (d
== NULL
))
275 /* This can happen if one is a deref_array and the other a wildcard */
276 if (b
->deref_type
!= d
->deref_type
)
279 switch (b
->deref_type
) {
280 case nir_deref_type_var
:
281 if (b
->var
!= d
->var
)
285 case nir_deref_type_array
:
286 assert(b
->arr
.index
.is_ssa
&& d
->arr
.index
.is_ssa
);
287 const bool const_b_idx
= nir_src_is_const(b
->arr
.index
);
288 const bool const_d_idx
= nir_src_is_const(d
->arr
.index
);
289 const unsigned b_idx
= const_b_idx
? nir_src_as_uint(b
->arr
.index
) : 0;
290 const unsigned d_idx
= const_d_idx
? nir_src_as_uint(d
->arr
.index
) : 0;
292 /* If we don't have an index into the path yet or if this entry in
293 * the path is at the array index, see if this is a candidate. We're
294 * looking for an index which is zero in the base deref and arr_idx
295 * in the search deref.
297 if ((*path_array_idx
< 0 || *path_array_idx
== i
) &&
298 const_b_idx
&& b_idx
== 0 &&
299 const_d_idx
&& d_idx
== arr_idx
) {
304 /* We're at the array index but not a candidate */
305 if (*path_array_idx
== i
)
308 /* If we're not the path array index, we must match exactly. We
309 * could probably just compare SSA values and trust in copy
310 * propagation but doing it ourselves means this pass can run a bit
313 if (b
->arr
.index
.ssa
== d
->arr
.index
.ssa
||
314 (const_b_idx
&& const_d_idx
&& b_idx
== d_idx
))
319 case nir_deref_type_array_wildcard
:
322 case nir_deref_type_struct
:
323 if (b
->strct
.index
!= d
->strct
.index
)
328 unreachable("Invalid deref type in a path");
332 /* If we got here without failing, we've matched. However, it isn't an
333 * array match unless we found an altered array index.
335 return *path_array_idx
> 0;
339 handle_read(nir_deref_instr
*src
, struct match_state
*state
)
341 /* We only need to create an entry for sources that might be used to form
342 * an array copy. Hence no indirects or indexing into a vector.
344 if (nir_deref_instr_has_indirect(src
) ||
345 nir_deref_instr_is_known_out_of_bounds(src
) ||
346 (src
->deref_type
== nir_deref_type_array
&&
347 glsl_type_is_vector(nir_src_as_deref(src
->parent
)->type
)))
350 nir_deref_path src_path
;
351 nir_deref_path_init(&src_path
, src
, state
->dead_ctx
);
353 /* Create a node for this source if it doesn't exist. The point of this is
354 * to know which nodes aliasing a given store we actually need to care
355 * about, to avoid creating an excessive amount of nodes.
357 node_for_path(&src_path
, state
);
360 /* The core implementation, which is used for both copies and writes. Return
361 * true if a copy is created.
364 handle_write(nir_deref_instr
*dst
, nir_deref_instr
*src
,
365 unsigned write_index
, unsigned read_index
,
366 struct match_state
*state
)
368 nir_builder
*b
= &state
->builder
;
370 nir_deref_path dst_path
;
371 nir_deref_path_init(&dst_path
, dst
, state
->dead_ctx
);
374 for (nir_deref_instr
**instr
= dst_path
.path
; *instr
; instr
++, idx
++) {
375 if ((*instr
)->deref_type
!= nir_deref_type_array
)
378 /* Get the entry where the index is replaced by a wildcard, so that we
379 * hopefully can keep matching an array copy.
381 struct match_node
*dst_node
=
382 node_for_path_with_wildcard(&dst_path
, idx
, state
);
387 if (nir_src_as_uint((*instr
)->arr
.index
) != dst_node
->next_array_idx
)
390 if (dst_node
->next_array_idx
== 0) {
391 /* At this point there may be multiple source indices which are zero,
392 * so we can't pin down the actual source index. Just store it and
395 nir_deref_path_init(&dst_node
->first_src_path
, src
, state
->dead_ctx
);
397 nir_deref_path src_path
;
398 nir_deref_path_init(&src_path
, src
, state
->dead_ctx
);
399 bool result
= try_match_deref(&dst_node
->first_src_path
,
400 &dst_node
->src_wildcard_idx
,
401 &src_path
, dst_node
->next_array_idx
);
402 nir_deref_path_finish(&src_path
);
407 /* Check if an aliasing write clobbered the array after the last normal
408 * write. For example, with a sequence like this:
410 * dst[0][*] = src[0][*];
411 * dst[0][0] = 0; // invalidates the array copy dst[*][*] = src[*][*]
412 * dst[1][*] = src[1][*];
414 * Note that the second write wouldn't reset the entry for dst[*][*]
415 * by itself, but it'll be caught by this check when processing the
418 if (dst_node
->last_successful_write
< dst_node
->last_overwritten
)
421 dst_node
->last_successful_write
= write_index
;
423 /* In this case we've successfully processed an array element. Check if
424 * this is the last, so that we can emit an array copy.
426 dst_node
->next_array_idx
++;
427 dst_node
->first_src_read
= MIN2(dst_node
->first_src_read
, read_index
);
428 if (dst_node
->next_array_idx
> 1 &&
429 dst_node
->next_array_idx
== glsl_get_length((*(instr
- 1))->type
)) {
430 /* Make sure that nothing was overwritten. */
431 struct match_node
*src_node
=
432 node_for_path_with_wildcard(&dst_node
->first_src_path
,
433 dst_node
->src_wildcard_idx
,
436 if (src_node
->last_overwritten
<= dst_node
->first_src_read
) {
437 nir_copy_deref(b
, build_wildcard_deref(b
, &dst_path
, idx
),
438 build_wildcard_deref(b
, &dst_node
->first_src_path
,
439 dst_node
->src_wildcard_idx
));
440 foreach_aliasing_node(&dst_path
, clobber
, state
);
448 dst_node
->next_array_idx
= 0;
449 dst_node
->src_wildcard_idx
= -1;
450 dst_node
->last_successful_write
= 0;
451 dst_node
->first_src_read
= UINT32_MAX
;
454 /* Mark everything aliasing dst_path as clobbered. This needs to happen
455 * last since in the loop above we need to know what last clobbered
456 * dst_node and this overwrites that.
458 foreach_aliasing_node(&dst_path
, clobber
, state
);
464 opt_find_array_copies_block(nir_builder
*b
, nir_block
*block
,
465 struct match_state
*state
)
467 bool progress
= false;
469 unsigned next_index
= 0;
471 _mesa_hash_table_clear(state
->table
, NULL
);
473 nir_foreach_instr(instr
, block
) {
474 if (instr
->type
!= nir_instr_type_intrinsic
)
477 /* Index the instructions before we do anything else. */
478 instr
->index
= next_index
++;
480 /* Save the index of this instruction */
481 state
->cur_instr
= instr
->index
;
483 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
485 if (intrin
->intrinsic
== nir_intrinsic_load_deref
) {
486 handle_read(nir_src_as_deref(intrin
->src
[0]), state
);
490 if (intrin
->intrinsic
!= nir_intrinsic_copy_deref
&&
491 intrin
->intrinsic
!= nir_intrinsic_store_deref
)
494 nir_deref_instr
*dst_deref
= nir_src_as_deref(intrin
->src
[0]);
496 /* The destination must be local. If we see a non-local store, we
497 * continue on because it won't affect local stores or read-only
500 if (dst_deref
->mode
!= nir_var_function_temp
)
503 /* If there are any known out-of-bounds writes, then we can just skip
504 * this write as it's undefined and won't contribute to building up an
505 * array copy anyways.
507 if (nir_deref_instr_is_known_out_of_bounds(dst_deref
))
510 nir_deref_instr
*src_deref
;
511 unsigned load_index
= 0;
512 if (intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
513 src_deref
= nir_src_as_deref(intrin
->src
[1]);
514 load_index
= intrin
->instr
.index
;
516 assert(intrin
->intrinsic
== nir_intrinsic_store_deref
);
517 nir_intrinsic_instr
*load
= nir_src_as_intrinsic(intrin
->src
[1]);
518 if (load
== NULL
|| load
->intrinsic
!= nir_intrinsic_load_deref
) {
521 src_deref
= nir_src_as_deref(load
->src
[0]);
522 load_index
= load
->instr
.index
;
525 if (nir_intrinsic_write_mask(intrin
) !=
526 (1 << glsl_get_components(dst_deref
->type
)) - 1) {
531 /* The source must be either local or something that's guaranteed to be
534 const nir_variable_mode read_only_modes
=
535 nir_var_shader_in
| nir_var_uniform
| nir_var_system_value
;
537 !(src_deref
->mode
& (nir_var_function_temp
| read_only_modes
))) {
541 /* There must be no indirects in the source or destination and no known
542 * out-of-bounds accesses in the source, and the copy must be fully
543 * qualified, or else we can't build up the array copy. We handled
544 * out-of-bounds accesses to the dest above. The types must match, since
545 * copy_deref currently can't bitcast mismatched deref types.
548 (nir_deref_instr_has_indirect(src_deref
) ||
549 nir_deref_instr_is_known_out_of_bounds(src_deref
) ||
550 nir_deref_instr_has_indirect(dst_deref
) ||
551 !glsl_type_is_vector_or_scalar(src_deref
->type
) ||
552 glsl_get_bare_type(src_deref
->type
) !=
553 glsl_get_bare_type(dst_deref
->type
))) {
557 state
->builder
.cursor
= nir_after_instr(instr
);
558 progress
|= handle_write(dst_deref
, src_deref
, instr
->index
,
566 opt_find_array_copies_impl(nir_function_impl
*impl
)
569 nir_builder_init(&b
, impl
);
571 bool progress
= false;
573 struct match_state s
;
574 s
.dead_ctx
= ralloc_context(NULL
);
575 s
.table
= _mesa_pointer_hash_table_create(s
.dead_ctx
);
576 nir_builder_init(&s
.builder
, impl
);
578 nir_foreach_block(block
, impl
) {
579 if (opt_find_array_copies_block(&b
, block
, &s
))
583 ralloc_free(s
.dead_ctx
);
586 nir_metadata_preserve(impl
, nir_metadata_block_index
|
587 nir_metadata_dominance
);
594 * This peephole optimization looks for a series of load/store_deref or
595 * copy_deref instructions that copy an array from one variable to another and
596 * turns it into a copy_deref that copies the entire array. The pattern it
597 * looks for is extremely specific but it's good enough to pick up on the
598 * input array copies in DXVK and should also be able to pick up the sequence
599 * generated by spirv_to_nir for a OpLoad of a large composite followed by
602 * TODO: Support out-of-order copies.
605 nir_opt_find_array_copies(nir_shader
*shader
)
607 bool progress
= false;
609 nir_foreach_function(function
, shader
) {
610 if (function
->impl
&& opt_find_array_copies_impl(function
->impl
))