2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
29 /* Note: these fields are only valid for leaf nodes */
31 unsigned next_array_idx
;
33 nir_deref_path first_src_path
;
35 /* The index of the first read of the source path that's part of the copy
36 * we're matching. If the last write to the source path is after this, we
37 * would get a different result from reading it at the end and we can't
40 unsigned first_src_read
;
42 /* The last time there was a write to this node. */
43 unsigned last_overwritten
;
45 /* The last time there was a write to this node which successfully advanced
46 * next_array_idx. This helps us catch any intervening aliased writes.
48 unsigned last_successful_write
;
50 unsigned num_children
;
51 struct match_node
*children
[];
55 /* Map from nir_variable * -> match_node */
56 struct hash_table
*table
;
65 static struct match_node
*
66 create_match_node(const struct glsl_type
*type
, struct match_state
*state
)
68 unsigned num_children
= 0;
69 if (glsl_type_is_array_or_matrix(type
)) {
70 /* One for wildcards */
71 num_children
= glsl_get_length(type
) + 1;
72 } else if (glsl_type_is_struct_or_ifc(type
)) {
73 num_children
= glsl_get_length(type
);
76 struct match_node
*node
= rzalloc_size(state
->dead_ctx
,
77 sizeof(struct match_node
) +
78 num_children
* sizeof(struct match_node
*));
79 node
->num_children
= num_children
;
80 node
->src_wildcard_idx
= -1;
81 node
->first_src_read
= UINT32_MAX
;
85 static struct match_node
*
86 node_for_deref(nir_deref_instr
*instr
, struct match_node
*parent
,
87 struct match_state
*state
)
90 switch (instr
->deref_type
) {
91 case nir_deref_type_var
: {
92 struct hash_entry
*entry
= _mesa_hash_table_search(state
->table
, instr
->var
);
96 struct match_node
*node
= create_match_node(instr
->type
, state
);
97 _mesa_hash_table_insert(state
->table
, instr
->var
, node
);
102 case nir_deref_type_array_wildcard
:
103 idx
= parent
->num_children
- 1;
106 case nir_deref_type_array
:
107 if (nir_src_is_const(instr
->arr
.index
)) {
108 idx
= nir_src_as_uint(instr
->arr
.index
);
109 assert(idx
< parent
->num_children
- 1);
111 idx
= parent
->num_children
- 1;
115 case nir_deref_type_struct
:
116 idx
= instr
->strct
.index
;
120 unreachable("bad deref type");
123 assert(idx
< parent
->num_children
);
124 if (parent
->children
[idx
]) {
125 return parent
->children
[idx
];
127 struct match_node
*node
= create_match_node(instr
->type
, state
);
128 parent
->children
[idx
] = node
;
133 static struct match_node
*
134 node_for_wildcard(const struct glsl_type
*type
, struct match_node
*parent
,
135 struct match_state
*state
)
137 assert(glsl_type_is_array_or_matrix(type
));
138 unsigned idx
= glsl_get_length(type
);
140 if (parent
->children
[idx
]) {
141 return parent
->children
[idx
];
143 struct match_node
*node
=
144 create_match_node(glsl_get_array_element(type
), state
);
145 parent
->children
[idx
] = node
;
150 static struct match_node
*
151 node_for_path(nir_deref_path
*path
, struct match_state
*state
)
153 struct match_node
*node
= NULL
;
154 for (nir_deref_instr
**instr
= path
->path
; *instr
; instr
++)
155 node
= node_for_deref(*instr
, node
, state
);
160 static struct match_node
*
161 node_for_path_with_wildcard(nir_deref_path
*path
, unsigned wildcard_idx
,
162 struct match_state
*state
)
164 struct match_node
*node
= NULL
;
166 for (nir_deref_instr
**instr
= path
->path
; *instr
; instr
++, idx
++) {
167 if (idx
== wildcard_idx
)
168 node
= node_for_wildcard((*(instr
- 1))->type
, node
, state
);
170 node
= node_for_deref(*instr
, node
, state
);
176 typedef void (*match_cb
)(struct match_node
*, struct match_state
*);
179 _foreach_aliasing(nir_deref_instr
**deref
, match_cb cb
,
180 struct match_node
*node
, struct match_state
*state
)
182 if (*deref
== NULL
) {
187 switch ((*deref
)->deref_type
) {
188 case nir_deref_type_struct
: {
189 struct match_node
*child
= node
->children
[(*deref
)->strct
.index
];
191 _foreach_aliasing(deref
+ 1, cb
, child
, state
);
195 case nir_deref_type_array
:
196 case nir_deref_type_array_wildcard
: {
197 if ((*deref
)->deref_type
== nir_deref_type_array_wildcard
||
198 !nir_src_is_const((*deref
)->arr
.index
)) {
199 /* This access may touch any index, so we have to visit all of
202 for (unsigned i
= 0; i
< node
->num_children
; i
++) {
203 if (node
->children
[i
])
204 _foreach_aliasing(deref
+ 1, cb
, node
->children
[i
], state
);
207 /* Visit the wildcard entry if any */
208 if (node
->children
[node
->num_children
- 1]) {
209 _foreach_aliasing(deref
+ 1, cb
,
210 node
->children
[node
->num_children
- 1], state
);
213 unsigned index
= nir_src_as_uint((*deref
)->arr
.index
);
214 /* Check that the index is in-bounds */
215 if (index
< node
->num_children
- 1 && node
->children
[index
])
216 _foreach_aliasing(deref
+ 1, cb
, node
->children
[index
], state
);
222 unreachable("bad deref type");
226 /* Given a deref path, find all the leaf deref nodes that alias it. */
229 foreach_aliasing_node(nir_deref_path
*path
,
231 struct match_state
*state
)
233 assert(path
->path
[0]->deref_type
== nir_deref_type_var
);
234 struct hash_entry
*entry
= _mesa_hash_table_search(state
->table
,
237 _foreach_aliasing(&path
->path
[1], cb
, entry
->data
, state
);
240 static nir_deref_instr
*
241 build_wildcard_deref(nir_builder
*b
, nir_deref_path
*path
,
242 unsigned wildcard_idx
)
244 assert(path
->path
[wildcard_idx
]->deref_type
== nir_deref_type_array
);
246 nir_deref_instr
*tail
=
247 nir_build_deref_array_wildcard(b
, path
->path
[wildcard_idx
- 1]);
249 for (unsigned i
= wildcard_idx
+ 1; path
->path
[i
]; i
++)
250 tail
= nir_build_deref_follower(b
, tail
, path
->path
[i
]);
256 clobber(struct match_node
*node
, struct match_state
*state
)
258 node
->last_overwritten
= state
->cur_instr
;
262 try_match_deref(nir_deref_path
*base_path
, int *path_array_idx
,
263 nir_deref_path
*deref_path
, int arr_idx
,
264 nir_deref_instr
*dst
)
266 for (int i
= 0; ; i
++) {
267 nir_deref_instr
*b
= base_path
->path
[i
];
268 nir_deref_instr
*d
= deref_path
->path
[i
];
269 /* They have to be the same length */
270 if ((b
== NULL
) != (d
== NULL
))
276 /* This can happen if one is a deref_array and the other a wildcard */
277 if (b
->deref_type
!= d
->deref_type
)
280 switch (b
->deref_type
) {
281 case nir_deref_type_var
:
282 if (b
->var
!= d
->var
)
286 case nir_deref_type_array
:
287 assert(b
->arr
.index
.is_ssa
&& d
->arr
.index
.is_ssa
);
288 const bool const_b_idx
= nir_src_is_const(b
->arr
.index
);
289 const bool const_d_idx
= nir_src_is_const(d
->arr
.index
);
290 const unsigned b_idx
= const_b_idx
? nir_src_as_uint(b
->arr
.index
) : 0;
291 const unsigned d_idx
= const_d_idx
? nir_src_as_uint(d
->arr
.index
) : 0;
293 /* If we don't have an index into the path yet or if this entry in
294 * the path is at the array index, see if this is a candidate. We're
295 * looking for an index which is zero in the base deref and arr_idx
296 * in the search deref and has a matching array size.
298 if ((*path_array_idx
< 0 || *path_array_idx
== i
) &&
299 const_b_idx
&& b_idx
== 0 &&
300 const_d_idx
&& d_idx
== arr_idx
&&
301 glsl_get_length(nir_deref_instr_parent(b
)->type
) ==
302 glsl_get_length(nir_deref_instr_parent(dst
)->type
)) {
307 /* We're at the array index but not a candidate */
308 if (*path_array_idx
== i
)
311 /* If we're not the path array index, we must match exactly. We
312 * could probably just compare SSA values and trust in copy
313 * propagation but doing it ourselves means this pass can run a bit
316 if (b
->arr
.index
.ssa
== d
->arr
.index
.ssa
||
317 (const_b_idx
&& const_d_idx
&& b_idx
== d_idx
))
322 case nir_deref_type_array_wildcard
:
325 case nir_deref_type_struct
:
326 if (b
->strct
.index
!= d
->strct
.index
)
331 unreachable("Invalid deref type in a path");
335 /* If we got here without failing, we've matched. However, it isn't an
336 * array match unless we found an altered array index.
338 return *path_array_idx
> 0;
342 handle_read(nir_deref_instr
*src
, struct match_state
*state
)
344 /* We only need to create an entry for sources that might be used to form
345 * an array copy. Hence no indirects or indexing into a vector.
347 if (nir_deref_instr_has_indirect(src
) ||
348 nir_deref_instr_is_known_out_of_bounds(src
) ||
349 (src
->deref_type
== nir_deref_type_array
&&
350 glsl_type_is_vector(nir_src_as_deref(src
->parent
)->type
)))
353 nir_deref_path src_path
;
354 nir_deref_path_init(&src_path
, src
, state
->dead_ctx
);
356 /* Create a node for this source if it doesn't exist. The point of this is
357 * to know which nodes aliasing a given store we actually need to care
358 * about, to avoid creating an excessive amount of nodes.
360 node_for_path(&src_path
, state
);
363 /* The core implementation, which is used for both copies and writes. Return
364 * true if a copy is created.
367 handle_write(nir_deref_instr
*dst
, nir_deref_instr
*src
,
368 unsigned write_index
, unsigned read_index
,
369 struct match_state
*state
)
371 nir_builder
*b
= &state
->builder
;
373 nir_deref_path dst_path
;
374 nir_deref_path_init(&dst_path
, dst
, state
->dead_ctx
);
377 for (nir_deref_instr
**instr
= dst_path
.path
; *instr
; instr
++, idx
++) {
378 if ((*instr
)->deref_type
!= nir_deref_type_array
)
381 /* Get the entry where the index is replaced by a wildcard, so that we
382 * hopefully can keep matching an array copy.
384 struct match_node
*dst_node
=
385 node_for_path_with_wildcard(&dst_path
, idx
, state
);
390 if (nir_src_as_uint((*instr
)->arr
.index
) != dst_node
->next_array_idx
)
393 if (dst_node
->next_array_idx
== 0) {
394 /* At this point there may be multiple source indices which are zero,
395 * so we can't pin down the actual source index. Just store it and
398 nir_deref_path_init(&dst_node
->first_src_path
, src
, state
->dead_ctx
);
400 nir_deref_path src_path
;
401 nir_deref_path_init(&src_path
, src
, state
->dead_ctx
);
402 bool result
= try_match_deref(&dst_node
->first_src_path
,
403 &dst_node
->src_wildcard_idx
,
404 &src_path
, dst_node
->next_array_idx
,
406 nir_deref_path_finish(&src_path
);
411 /* Check if an aliasing write clobbered the array after the last normal
412 * write. For example, with a sequence like this:
414 * dst[0][*] = src[0][*];
415 * dst[0][0] = 0; // invalidates the array copy dst[*][*] = src[*][*]
416 * dst[1][*] = src[1][*];
418 * Note that the second write wouldn't reset the entry for dst[*][*]
419 * by itself, but it'll be caught by this check when processing the
422 if (dst_node
->last_successful_write
< dst_node
->last_overwritten
)
425 dst_node
->last_successful_write
= write_index
;
427 /* In this case we've successfully processed an array element. Check if
428 * this is the last, so that we can emit an array copy.
430 dst_node
->next_array_idx
++;
431 dst_node
->first_src_read
= MIN2(dst_node
->first_src_read
, read_index
);
432 if (dst_node
->next_array_idx
> 1 &&
433 dst_node
->next_array_idx
== glsl_get_length((*(instr
- 1))->type
)) {
434 /* Make sure that nothing was overwritten. */
435 struct match_node
*src_node
=
436 node_for_path_with_wildcard(&dst_node
->first_src_path
,
437 dst_node
->src_wildcard_idx
,
440 if (src_node
->last_overwritten
<= dst_node
->first_src_read
) {
441 nir_copy_deref(b
, build_wildcard_deref(b
, &dst_path
, idx
),
442 build_wildcard_deref(b
, &dst_node
->first_src_path
,
443 dst_node
->src_wildcard_idx
));
444 foreach_aliasing_node(&dst_path
, clobber
, state
);
452 dst_node
->next_array_idx
= 0;
453 dst_node
->src_wildcard_idx
= -1;
454 dst_node
->last_successful_write
= 0;
455 dst_node
->first_src_read
= UINT32_MAX
;
458 /* Mark everything aliasing dst_path as clobbered. This needs to happen
459 * last since in the loop above we need to know what last clobbered
460 * dst_node and this overwrites that.
462 foreach_aliasing_node(&dst_path
, clobber
, state
);
468 opt_find_array_copies_block(nir_builder
*b
, nir_block
*block
,
469 struct match_state
*state
)
471 bool progress
= false;
473 unsigned next_index
= 0;
475 _mesa_hash_table_clear(state
->table
, NULL
);
477 nir_foreach_instr(instr
, block
) {
478 if (instr
->type
!= nir_instr_type_intrinsic
)
481 /* Index the instructions before we do anything else. */
482 instr
->index
= next_index
++;
484 /* Save the index of this instruction */
485 state
->cur_instr
= instr
->index
;
487 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
489 if (intrin
->intrinsic
== nir_intrinsic_load_deref
) {
490 handle_read(nir_src_as_deref(intrin
->src
[0]), state
);
494 if (intrin
->intrinsic
!= nir_intrinsic_copy_deref
&&
495 intrin
->intrinsic
!= nir_intrinsic_store_deref
)
498 nir_deref_instr
*dst_deref
= nir_src_as_deref(intrin
->src
[0]);
500 /* The destination must be local. If we see a non-local store, we
501 * continue on because it won't affect local stores or read-only
504 if (dst_deref
->mode
!= nir_var_function_temp
)
507 /* If there are any known out-of-bounds writes, then we can just skip
508 * this write as it's undefined and won't contribute to building up an
509 * array copy anyways.
511 if (nir_deref_instr_is_known_out_of_bounds(dst_deref
))
514 nir_deref_instr
*src_deref
;
515 unsigned load_index
= 0;
516 if (intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
517 src_deref
= nir_src_as_deref(intrin
->src
[1]);
518 load_index
= intrin
->instr
.index
;
520 assert(intrin
->intrinsic
== nir_intrinsic_store_deref
);
521 nir_intrinsic_instr
*load
= nir_src_as_intrinsic(intrin
->src
[1]);
522 if (load
== NULL
|| load
->intrinsic
!= nir_intrinsic_load_deref
) {
525 src_deref
= nir_src_as_deref(load
->src
[0]);
526 load_index
= load
->instr
.index
;
529 if (nir_intrinsic_write_mask(intrin
) !=
530 (1 << glsl_get_components(dst_deref
->type
)) - 1) {
535 /* The source must be either local or something that's guaranteed to be
538 const nir_variable_mode read_only_modes
=
539 nir_var_shader_in
| nir_var_uniform
| nir_var_system_value
;
541 !(src_deref
->mode
& (nir_var_function_temp
| read_only_modes
))) {
545 /* There must be no indirects in the source or destination and no known
546 * out-of-bounds accesses in the source, and the copy must be fully
547 * qualified, or else we can't build up the array copy. We handled
548 * out-of-bounds accesses to the dest above. The types must match, since
549 * copy_deref currently can't bitcast mismatched deref types.
552 (nir_deref_instr_has_indirect(src_deref
) ||
553 nir_deref_instr_is_known_out_of_bounds(src_deref
) ||
554 nir_deref_instr_has_indirect(dst_deref
) ||
555 !glsl_type_is_vector_or_scalar(src_deref
->type
) ||
556 glsl_get_bare_type(src_deref
->type
) !=
557 glsl_get_bare_type(dst_deref
->type
))) {
561 state
->builder
.cursor
= nir_after_instr(instr
);
562 progress
|= handle_write(dst_deref
, src_deref
, instr
->index
,
570 opt_find_array_copies_impl(nir_function_impl
*impl
)
573 nir_builder_init(&b
, impl
);
575 bool progress
= false;
577 struct match_state s
;
578 s
.dead_ctx
= ralloc_context(NULL
);
579 s
.table
= _mesa_pointer_hash_table_create(s
.dead_ctx
);
580 nir_builder_init(&s
.builder
, impl
);
582 nir_foreach_block(block
, impl
) {
583 if (opt_find_array_copies_block(&b
, block
, &s
))
587 ralloc_free(s
.dead_ctx
);
590 nir_metadata_preserve(impl
, nir_metadata_block_index
|
591 nir_metadata_dominance
);
598 * This peephole optimization looks for a series of load/store_deref or
599 * copy_deref instructions that copy an array from one variable to another and
600 * turns it into a copy_deref that copies the entire array. The pattern it
601 * looks for is extremely specific but it's good enough to pick up on the
602 * input array copies in DXVK and should also be able to pick up the sequence
603 * generated by spirv_to_nir for a OpLoad of a large composite followed by
606 * TODO: Support out-of-order copies.
609 nir_opt_find_array_copies(nir_shader
*shader
)
611 bool progress
= false;
613 nir_foreach_function(function
, shader
) {
614 if (function
->impl
&& opt_find_array_copies_impl(function
->impl
))