nir/builder: Add nir_b2i
[mesa.git] / src / compiler / nir / nir_opt_find_array_copies.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 struct match_node {
29 /* Note: these fields are only valid for leaf nodes */
30
31 unsigned next_array_idx;
32 int src_wildcard_idx;
33 nir_deref_path first_src_path;
34
35 /* The index of the first read of the source path that's part of the copy
36 * we're matching. If the last write to the source path is after this, we
37 * would get a different result from reading it at the end and we can't
38 * emit the copy.
39 */
40 unsigned first_src_read;
41
42 /* The last time there was a write to this node. */
43 unsigned last_overwritten;
44
45 /* The last time there was a write to this node which successfully advanced
46 * next_array_idx. This helps us catch any intervening aliased writes.
47 */
48 unsigned last_successful_write;
49
50 unsigned num_children;
51 struct match_node *children[];
52 };
53
54 struct match_state {
55 /* Map from nir_variable * -> match_node */
56 struct hash_table *table;
57
58 unsigned cur_instr;
59
60 nir_builder builder;
61
62 void *dead_ctx;
63 };
64
65 static struct match_node *
66 create_match_node(const struct glsl_type *type, struct match_state *state)
67 {
68 unsigned num_children = 0;
69 if (glsl_type_is_array_or_matrix(type)) {
70 /* One for wildcards */
71 num_children = glsl_get_length(type) + 1;
72 } else if (glsl_type_is_struct_or_ifc(type)) {
73 num_children = glsl_get_length(type);
74 }
75
76 struct match_node *node = rzalloc_size(state->dead_ctx,
77 sizeof(struct match_node) +
78 num_children * sizeof(struct match_node *));
79 node->num_children = num_children;
80 node->src_wildcard_idx = -1;
81 node->first_src_read = UINT32_MAX;
82 return node;
83 }
84
85 static struct match_node *
86 node_for_deref(nir_deref_instr *instr, struct match_node *parent,
87 struct match_state *state)
88 {
89 unsigned idx;
90 switch (instr->deref_type) {
91 case nir_deref_type_var: {
92 struct hash_entry *entry = _mesa_hash_table_search(state->table, instr->var);
93 if (entry) {
94 return entry->data;
95 } else {
96 struct match_node *node = create_match_node(instr->type, state);
97 _mesa_hash_table_insert(state->table, instr->var, node);
98 return node;
99 }
100 }
101
102 case nir_deref_type_array_wildcard:
103 idx = parent->num_children - 1;
104 break;
105
106 case nir_deref_type_array:
107 if (nir_src_is_const(instr->arr.index)) {
108 idx = nir_src_as_uint(instr->arr.index);
109 assert(idx < parent->num_children - 1);
110 } else {
111 idx = parent->num_children - 1;
112 }
113 break;
114
115 case nir_deref_type_struct:
116 idx = instr->strct.index;
117 break;
118
119 default:
120 unreachable("bad deref type");
121 }
122
123 assert(idx < parent->num_children);
124 if (parent->children[idx]) {
125 return parent->children[idx];
126 } else {
127 struct match_node *node = create_match_node(instr->type, state);
128 parent->children[idx] = node;
129 return node;
130 }
131 }
132
133 static struct match_node *
134 node_for_wildcard(const struct glsl_type *type, struct match_node *parent,
135 struct match_state *state)
136 {
137 assert(glsl_type_is_array_or_matrix(type));
138 unsigned idx = glsl_get_length(type);
139
140 if (parent->children[idx]) {
141 return parent->children[idx];
142 } else {
143 struct match_node *node =
144 create_match_node(glsl_get_array_element(type), state);
145 parent->children[idx] = node;
146 return node;
147 }
148 }
149
150 static struct match_node *
151 node_for_path(nir_deref_path *path, struct match_state *state)
152 {
153 struct match_node *node = NULL;
154 for (nir_deref_instr **instr = path->path; *instr; instr++)
155 node = node_for_deref(*instr, node, state);
156
157 return node;
158 }
159
160 static struct match_node *
161 node_for_path_with_wildcard(nir_deref_path *path, unsigned wildcard_idx,
162 struct match_state *state)
163 {
164 struct match_node *node = NULL;
165 unsigned idx = 0;
166 for (nir_deref_instr **instr = path->path; *instr; instr++, idx++) {
167 if (idx == wildcard_idx)
168 node = node_for_wildcard((*(instr - 1))->type, node, state);
169 else
170 node = node_for_deref(*instr, node, state);
171 }
172
173 return node;
174 }
175
176 typedef void (*match_cb)(struct match_node *, struct match_state *);
177
178 static void
179 _foreach_aliasing(nir_deref_instr **deref, match_cb cb,
180 struct match_node *node, struct match_state *state)
181 {
182 if (*deref == NULL) {
183 cb(node, state);
184 return;
185 }
186
187 switch ((*deref)->deref_type) {
188 case nir_deref_type_struct: {
189 struct match_node *child = node->children[(*deref)->strct.index];
190 if (child)
191 _foreach_aliasing(deref + 1, cb, child, state);
192 return;
193 }
194
195 case nir_deref_type_array:
196 case nir_deref_type_array_wildcard: {
197 if ((*deref)->deref_type == nir_deref_type_array_wildcard ||
198 !nir_src_is_const((*deref)->arr.index)) {
199 /* This access may touch any index, so we have to visit all of
200 * them.
201 */
202 for (unsigned i = 0; i < node->num_children; i++) {
203 if (node->children[i])
204 _foreach_aliasing(deref + 1, cb, node->children[i], state);
205 }
206 } else {
207 /* Visit the wildcard entry if any */
208 if (node->children[node->num_children - 1]) {
209 _foreach_aliasing(deref + 1, cb,
210 node->children[node->num_children - 1], state);
211 }
212
213 unsigned index = nir_src_as_uint((*deref)->arr.index);
214 /* Check that the index is in-bounds */
215 if (index < node->num_children - 1 && node->children[index])
216 _foreach_aliasing(deref + 1, cb, node->children[index], state);
217 }
218 return;
219 }
220
221 default:
222 unreachable("bad deref type");
223 }
224 }
225
226 /* Given a deref path, find all the leaf deref nodes that alias it. */
227
228 static void
229 foreach_aliasing_node(nir_deref_path *path,
230 match_cb cb,
231 struct match_state *state)
232 {
233 assert(path->path[0]->deref_type == nir_deref_type_var);
234 struct hash_entry *entry = _mesa_hash_table_search(state->table,
235 path->path[0]->var);
236 if (entry)
237 _foreach_aliasing(&path->path[1], cb, entry->data, state);
238 }
239
240 static nir_deref_instr *
241 build_wildcard_deref(nir_builder *b, nir_deref_path *path,
242 unsigned wildcard_idx)
243 {
244 assert(path->path[wildcard_idx]->deref_type == nir_deref_type_array);
245
246 nir_deref_instr *tail =
247 nir_build_deref_array_wildcard(b, path->path[wildcard_idx - 1]);
248
249 for (unsigned i = wildcard_idx + 1; path->path[i]; i++)
250 tail = nir_build_deref_follower(b, tail, path->path[i]);
251
252 return tail;
253 }
254
255 static void
256 clobber(struct match_node *node, struct match_state *state)
257 {
258 node->last_overwritten = state->cur_instr;
259 }
260
261 static bool
262 try_match_deref(nir_deref_path *base_path, int *path_array_idx,
263 nir_deref_path *deref_path, int arr_idx)
264 {
265 for (int i = 0; ; i++) {
266 nir_deref_instr *b = base_path->path[i];
267 nir_deref_instr *d = deref_path->path[i];
268 /* They have to be the same length */
269 if ((b == NULL) != (d == NULL))
270 return false;
271
272 if (b == NULL)
273 break;
274
275 /* This can happen if one is a deref_array and the other a wildcard */
276 if (b->deref_type != d->deref_type)
277 return false;;
278
279 switch (b->deref_type) {
280 case nir_deref_type_var:
281 if (b->var != d->var)
282 return false;
283 continue;
284
285 case nir_deref_type_array:
286 assert(b->arr.index.is_ssa && d->arr.index.is_ssa);
287 const bool const_b_idx = nir_src_is_const(b->arr.index);
288 const bool const_d_idx = nir_src_is_const(d->arr.index);
289 const unsigned b_idx = const_b_idx ? nir_src_as_uint(b->arr.index) : 0;
290 const unsigned d_idx = const_d_idx ? nir_src_as_uint(d->arr.index) : 0;
291
292 /* If we don't have an index into the path yet or if this entry in
293 * the path is at the array index, see if this is a candidate. We're
294 * looking for an index which is zero in the base deref and arr_idx
295 * in the search deref.
296 */
297 if ((*path_array_idx < 0 || *path_array_idx == i) &&
298 const_b_idx && b_idx == 0 &&
299 const_d_idx && d_idx == arr_idx) {
300 *path_array_idx = i;
301 continue;
302 }
303
304 /* We're at the array index but not a candidate */
305 if (*path_array_idx == i)
306 return false;
307
308 /* If we're not the path array index, we must match exactly. We
309 * could probably just compare SSA values and trust in copy
310 * propagation but doing it ourselves means this pass can run a bit
311 * earlier.
312 */
313 if (b->arr.index.ssa == d->arr.index.ssa ||
314 (const_b_idx && const_d_idx && b_idx == d_idx))
315 continue;
316
317 return false;
318
319 case nir_deref_type_array_wildcard:
320 continue;
321
322 case nir_deref_type_struct:
323 if (b->strct.index != d->strct.index)
324 return false;
325 continue;
326
327 default:
328 unreachable("Invalid deref type in a path");
329 }
330 }
331
332 /* If we got here without failing, we've matched. However, it isn't an
333 * array match unless we found an altered array index.
334 */
335 return *path_array_idx > 0;
336 }
337
338 static void
339 handle_read(nir_deref_instr *src, struct match_state *state)
340 {
341 /* We only need to create an entry for sources that might be used to form
342 * an array copy. Hence no indirects or indexing into a vector.
343 */
344 if (nir_deref_instr_has_indirect(src) ||
345 nir_deref_instr_is_known_out_of_bounds(src) ||
346 (src->deref_type == nir_deref_type_array &&
347 glsl_type_is_vector(nir_src_as_deref(src->parent)->type)))
348 return;
349
350 nir_deref_path src_path;
351 nir_deref_path_init(&src_path, src, state->dead_ctx);
352
353 /* Create a node for this source if it doesn't exist. The point of this is
354 * to know which nodes aliasing a given store we actually need to care
355 * about, to avoid creating an excessive amount of nodes.
356 */
357 node_for_path(&src_path, state);
358 }
359
360 /* The core implementation, which is used for both copies and writes. Return
361 * true if a copy is created.
362 */
363 static bool
364 handle_write(nir_deref_instr *dst, nir_deref_instr *src,
365 unsigned write_index, unsigned read_index,
366 struct match_state *state)
367 {
368 nir_builder *b = &state->builder;
369
370 nir_deref_path dst_path;
371 nir_deref_path_init(&dst_path, dst, state->dead_ctx);
372
373 unsigned idx = 0;
374 for (nir_deref_instr **instr = dst_path.path; *instr; instr++, idx++) {
375 if ((*instr)->deref_type != nir_deref_type_array)
376 continue;
377
378 /* Get the entry where the index is replaced by a wildcard, so that we
379 * hopefully can keep matching an array copy.
380 */
381 struct match_node *dst_node =
382 node_for_path_with_wildcard(&dst_path, idx, state);
383
384 if (!src)
385 goto reset;
386
387 if (nir_src_as_uint((*instr)->arr.index) != dst_node->next_array_idx)
388 goto reset;
389
390 if (dst_node->next_array_idx == 0) {
391 /* At this point there may be multiple source indices which are zero,
392 * so we can't pin down the actual source index. Just store it and
393 * move on.
394 */
395 nir_deref_path_init(&dst_node->first_src_path, src, state->dead_ctx);
396 } else {
397 nir_deref_path src_path;
398 nir_deref_path_init(&src_path, src, state->dead_ctx);
399 bool result = try_match_deref(&dst_node->first_src_path,
400 &dst_node->src_wildcard_idx,
401 &src_path, dst_node->next_array_idx);
402 nir_deref_path_finish(&src_path);
403 if (!result)
404 goto reset;
405 }
406
407 /* Check if an aliasing write clobbered the array after the last normal
408 * write. For example, with a sequence like this:
409 *
410 * dst[0][*] = src[0][*];
411 * dst[0][0] = 0; // invalidates the array copy dst[*][*] = src[*][*]
412 * dst[1][*] = src[1][*];
413 *
414 * Note that the second write wouldn't reset the entry for dst[*][*]
415 * by itself, but it'll be caught by this check when processing the
416 * third copy.
417 */
418 if (dst_node->last_successful_write < dst_node->last_overwritten)
419 goto reset;
420
421 dst_node->last_successful_write = write_index;
422
423 /* In this case we've successfully processed an array element. Check if
424 * this is the last, so that we can emit an array copy.
425 */
426 dst_node->next_array_idx++;
427 dst_node->first_src_read = MIN2(dst_node->first_src_read, read_index);
428 if (dst_node->next_array_idx > 1 &&
429 dst_node->next_array_idx == glsl_get_length((*(instr - 1))->type)) {
430 /* Make sure that nothing was overwritten. */
431 struct match_node *src_node =
432 node_for_path_with_wildcard(&dst_node->first_src_path,
433 dst_node->src_wildcard_idx,
434 state);
435
436 if (src_node->last_overwritten <= dst_node->first_src_read) {
437 nir_copy_deref(b, build_wildcard_deref(b, &dst_path, idx),
438 build_wildcard_deref(b, &dst_node->first_src_path,
439 dst_node->src_wildcard_idx));
440 foreach_aliasing_node(&dst_path, clobber, state);
441 return true;
442 }
443 } else {
444 continue;
445 }
446
447 reset:
448 dst_node->next_array_idx = 0;
449 dst_node->src_wildcard_idx = -1;
450 dst_node->last_successful_write = 0;
451 dst_node->first_src_read = UINT32_MAX;
452 }
453
454 /* Mark everything aliasing dst_path as clobbered. This needs to happen
455 * last since in the loop above we need to know what last clobbered
456 * dst_node and this overwrites that.
457 */
458 foreach_aliasing_node(&dst_path, clobber, state);
459
460 return false;
461 }
462
463 static bool
464 opt_find_array_copies_block(nir_builder *b, nir_block *block,
465 struct match_state *state)
466 {
467 bool progress = false;
468
469 unsigned next_index = 0;
470
471 _mesa_hash_table_clear(state->table, NULL);
472
473 nir_foreach_instr(instr, block) {
474 if (instr->type != nir_instr_type_intrinsic)
475 continue;
476
477 /* Index the instructions before we do anything else. */
478 instr->index = next_index++;
479
480 /* Save the index of this instruction */
481 state->cur_instr = instr->index;
482
483 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
484
485 if (intrin->intrinsic == nir_intrinsic_load_deref) {
486 handle_read(nir_src_as_deref(intrin->src[0]), state);
487 continue;
488 }
489
490 if (intrin->intrinsic != nir_intrinsic_copy_deref &&
491 intrin->intrinsic != nir_intrinsic_store_deref)
492 continue;
493
494 nir_deref_instr *dst_deref = nir_src_as_deref(intrin->src[0]);
495
496 /* The destination must be local. If we see a non-local store, we
497 * continue on because it won't affect local stores or read-only
498 * variables.
499 */
500 if (dst_deref->mode != nir_var_function_temp)
501 continue;
502
503 /* If there are any known out-of-bounds writes, then we can just skip
504 * this write as it's undefined and won't contribute to building up an
505 * array copy anyways.
506 */
507 if (nir_deref_instr_is_known_out_of_bounds(dst_deref))
508 continue;
509
510 nir_deref_instr *src_deref;
511 unsigned load_index = 0;
512 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
513 src_deref = nir_src_as_deref(intrin->src[1]);
514 load_index = intrin->instr.index;
515 } else {
516 assert(intrin->intrinsic == nir_intrinsic_store_deref);
517 nir_intrinsic_instr *load = nir_src_as_intrinsic(intrin->src[1]);
518 if (load == NULL || load->intrinsic != nir_intrinsic_load_deref) {
519 src_deref = NULL;
520 } else {
521 src_deref = nir_src_as_deref(load->src[0]);
522 load_index = load->instr.index;
523 }
524
525 if (nir_intrinsic_write_mask(intrin) !=
526 (1 << glsl_get_components(dst_deref->type)) - 1) {
527 src_deref = NULL;
528 }
529 }
530
531 /* The source must be either local or something that's guaranteed to be
532 * read-only.
533 */
534 const nir_variable_mode read_only_modes =
535 nir_var_shader_in | nir_var_uniform | nir_var_system_value;
536 if (src_deref &&
537 !(src_deref->mode & (nir_var_function_temp | read_only_modes))) {
538 src_deref = NULL;
539 }
540
541 /* There must be no indirects in the source or destination and no known
542 * out-of-bounds accesses in the source, and the copy must be fully
543 * qualified, or else we can't build up the array copy. We handled
544 * out-of-bounds accesses to the dest above. The types must match, since
545 * copy_deref currently can't bitcast mismatched deref types.
546 */
547 if (src_deref &&
548 (nir_deref_instr_has_indirect(src_deref) ||
549 nir_deref_instr_is_known_out_of_bounds(src_deref) ||
550 nir_deref_instr_has_indirect(dst_deref) ||
551 !glsl_type_is_vector_or_scalar(src_deref->type) ||
552 glsl_get_bare_type(src_deref->type) !=
553 glsl_get_bare_type(dst_deref->type))) {
554 src_deref = NULL;
555 }
556
557 state->builder.cursor = nir_after_instr(instr);
558 progress |= handle_write(dst_deref, src_deref, instr->index,
559 load_index, state);
560 }
561
562 return progress;
563 }
564
565 static bool
566 opt_find_array_copies_impl(nir_function_impl *impl)
567 {
568 nir_builder b;
569 nir_builder_init(&b, impl);
570
571 bool progress = false;
572
573 struct match_state s;
574 s.dead_ctx = ralloc_context(NULL);
575 s.table = _mesa_pointer_hash_table_create(s.dead_ctx);
576 nir_builder_init(&s.builder, impl);
577
578 nir_foreach_block(block, impl) {
579 if (opt_find_array_copies_block(&b, block, &s))
580 progress = true;
581 }
582
583 ralloc_free(s.dead_ctx);
584
585 if (progress) {
586 nir_metadata_preserve(impl, nir_metadata_block_index |
587 nir_metadata_dominance);
588 }
589
590 return progress;
591 }
592
593 /**
594 * This peephole optimization looks for a series of load/store_deref or
595 * copy_deref instructions that copy an array from one variable to another and
596 * turns it into a copy_deref that copies the entire array. The pattern it
597 * looks for is extremely specific but it's good enough to pick up on the
598 * input array copies in DXVK and should also be able to pick up the sequence
599 * generated by spirv_to_nir for a OpLoad of a large composite followed by
600 * OpStore.
601 *
602 * TODO: Support out-of-order copies.
603 */
604 bool
605 nir_opt_find_array_copies(nir_shader *shader)
606 {
607 bool progress = false;
608
609 nir_foreach_function(function, shader) {
610 if (function->impl && opt_find_array_copies_impl(function->impl))
611 progress = true;
612 }
613
614 return progress;
615 }