nir: Add a new memory_barrier_tcs_patch intrinsic
[mesa.git] / src / compiler / nir / nir_opt_find_array_copies.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 struct match_node {
29 /* Note: these fields are only valid for leaf nodes */
30
31 unsigned next_array_idx;
32 int src_wildcard_idx;
33 nir_deref_path first_src_path;
34
35 /* The index of the first read of the source path that's part of the copy
36 * we're matching. If the last write to the source path is after this, we
37 * would get a different result from reading it at the end and we can't
38 * emit the copy.
39 */
40 unsigned first_src_read;
41
42 /* The last time there was a write to this node. */
43 unsigned last_overwritten;
44
45 /* The last time there was a write to this node which successfully advanced
46 * next_array_idx. This helps us catch any intervening aliased writes.
47 */
48 unsigned last_successful_write;
49
50 unsigned num_children;
51 struct match_node *children[];
52 };
53
54 struct match_state {
55 /* Map from nir_variable * -> match_node */
56 struct hash_table *table;
57
58 unsigned cur_instr;
59
60 nir_builder builder;
61
62 void *dead_ctx;
63 };
64
65 static struct match_node *
66 create_match_node(const struct glsl_type *type, struct match_state *state)
67 {
68 unsigned num_children = 0;
69 if (glsl_type_is_array_or_matrix(type)) {
70 /* One for wildcards */
71 num_children = glsl_get_length(type) + 1;
72 } else if (glsl_type_is_struct_or_ifc(type)) {
73 num_children = glsl_get_length(type);
74 }
75
76 struct match_node *node = rzalloc_size(state->dead_ctx,
77 sizeof(struct match_node) +
78 num_children * sizeof(struct match_node *));
79 node->num_children = num_children;
80 node->src_wildcard_idx = -1;
81 node->first_src_read = UINT32_MAX;
82 return node;
83 }
84
85 static struct match_node *
86 node_for_deref(nir_deref_instr *instr, struct match_node *parent,
87 struct match_state *state)
88 {
89 unsigned idx;
90 switch (instr->deref_type) {
91 case nir_deref_type_var: {
92 struct hash_entry *entry = _mesa_hash_table_search(state->table, instr->var);
93 if (entry) {
94 return entry->data;
95 } else {
96 struct match_node *node = create_match_node(instr->type, state);
97 _mesa_hash_table_insert(state->table, instr->var, node);
98 return node;
99 }
100 }
101
102 case nir_deref_type_array_wildcard:
103 idx = parent->num_children - 1;
104 break;
105
106 case nir_deref_type_array:
107 if (nir_src_is_const(instr->arr.index)) {
108 idx = nir_src_as_uint(instr->arr.index);
109 assert(idx < parent->num_children - 1);
110 } else {
111 idx = parent->num_children - 1;
112 }
113 break;
114
115 case nir_deref_type_struct:
116 idx = instr->strct.index;
117 break;
118
119 default:
120 unreachable("bad deref type");
121 }
122
123 assert(idx < parent->num_children);
124 if (parent->children[idx]) {
125 return parent->children[idx];
126 } else {
127 struct match_node *node = create_match_node(instr->type, state);
128 parent->children[idx] = node;
129 return node;
130 }
131 }
132
133 static struct match_node *
134 node_for_wildcard(const struct glsl_type *type, struct match_node *parent,
135 struct match_state *state)
136 {
137 assert(glsl_type_is_array_or_matrix(type));
138 unsigned idx = glsl_get_length(type);
139
140 if (parent->children[idx]) {
141 return parent->children[idx];
142 } else {
143 struct match_node *node =
144 create_match_node(glsl_get_array_element(type), state);
145 parent->children[idx] = node;
146 return node;
147 }
148 }
149
150 static struct match_node *
151 node_for_path(nir_deref_path *path, struct match_state *state)
152 {
153 struct match_node *node = NULL;
154 for (nir_deref_instr **instr = path->path; *instr; instr++)
155 node = node_for_deref(*instr, node, state);
156
157 return node;
158 }
159
160 static struct match_node *
161 node_for_path_with_wildcard(nir_deref_path *path, unsigned wildcard_idx,
162 struct match_state *state)
163 {
164 struct match_node *node = NULL;
165 unsigned idx = 0;
166 for (nir_deref_instr **instr = path->path; *instr; instr++, idx++) {
167 if (idx == wildcard_idx)
168 node = node_for_wildcard((*(instr - 1))->type, node, state);
169 else
170 node = node_for_deref(*instr, node, state);
171 }
172
173 return node;
174 }
175
176 typedef void (*match_cb)(struct match_node *, struct match_state *);
177
178 static void
179 _foreach_aliasing(nir_deref_instr **deref, match_cb cb,
180 struct match_node *node, struct match_state *state)
181 {
182 if (*deref == NULL) {
183 cb(node, state);
184 return;
185 }
186
187 switch ((*deref)->deref_type) {
188 case nir_deref_type_struct: {
189 struct match_node *child = node->children[(*deref)->strct.index];
190 if (child)
191 _foreach_aliasing(deref + 1, cb, child, state);
192 return;
193 }
194
195 case nir_deref_type_array:
196 case nir_deref_type_array_wildcard: {
197 if ((*deref)->deref_type == nir_deref_type_array_wildcard ||
198 !nir_src_is_const((*deref)->arr.index)) {
199 /* This access may touch any index, so we have to visit all of
200 * them.
201 */
202 for (unsigned i = 0; i < node->num_children; i++) {
203 if (node->children[i])
204 _foreach_aliasing(deref + 1, cb, node->children[i], state);
205 }
206 } else {
207 /* Visit the wildcard entry if any */
208 if (node->children[node->num_children - 1]) {
209 _foreach_aliasing(deref + 1, cb,
210 node->children[node->num_children - 1], state);
211 }
212
213 unsigned index = nir_src_as_uint((*deref)->arr.index);
214 /* Check that the index is in-bounds */
215 if (index < node->num_children - 1 && node->children[index])
216 _foreach_aliasing(deref + 1, cb, node->children[index], state);
217 }
218 return;
219 }
220
221 default:
222 unreachable("bad deref type");
223 }
224 }
225
226 /* Given a deref path, find all the leaf deref nodes that alias it. */
227
228 static void
229 foreach_aliasing_node(nir_deref_path *path,
230 match_cb cb,
231 struct match_state *state)
232 {
233 assert(path->path[0]->deref_type == nir_deref_type_var);
234 struct hash_entry *entry = _mesa_hash_table_search(state->table,
235 path->path[0]->var);
236 if (entry)
237 _foreach_aliasing(&path->path[1], cb, entry->data, state);
238 }
239
240 static nir_deref_instr *
241 build_wildcard_deref(nir_builder *b, nir_deref_path *path,
242 unsigned wildcard_idx)
243 {
244 assert(path->path[wildcard_idx]->deref_type == nir_deref_type_array);
245
246 nir_deref_instr *tail =
247 nir_build_deref_array_wildcard(b, path->path[wildcard_idx - 1]);
248
249 for (unsigned i = wildcard_idx + 1; path->path[i]; i++)
250 tail = nir_build_deref_follower(b, tail, path->path[i]);
251
252 return tail;
253 }
254
255 static void
256 clobber(struct match_node *node, struct match_state *state)
257 {
258 node->last_overwritten = state->cur_instr;
259 }
260
261 static bool
262 try_match_deref(nir_deref_path *base_path, int *path_array_idx,
263 nir_deref_path *deref_path, int arr_idx,
264 nir_deref_instr *dst)
265 {
266 for (int i = 0; ; i++) {
267 nir_deref_instr *b = base_path->path[i];
268 nir_deref_instr *d = deref_path->path[i];
269 /* They have to be the same length */
270 if ((b == NULL) != (d == NULL))
271 return false;
272
273 if (b == NULL)
274 break;
275
276 /* This can happen if one is a deref_array and the other a wildcard */
277 if (b->deref_type != d->deref_type)
278 return false;;
279
280 switch (b->deref_type) {
281 case nir_deref_type_var:
282 if (b->var != d->var)
283 return false;
284 continue;
285
286 case nir_deref_type_array:
287 assert(b->arr.index.is_ssa && d->arr.index.is_ssa);
288 const bool const_b_idx = nir_src_is_const(b->arr.index);
289 const bool const_d_idx = nir_src_is_const(d->arr.index);
290 const unsigned b_idx = const_b_idx ? nir_src_as_uint(b->arr.index) : 0;
291 const unsigned d_idx = const_d_idx ? nir_src_as_uint(d->arr.index) : 0;
292
293 /* If we don't have an index into the path yet or if this entry in
294 * the path is at the array index, see if this is a candidate. We're
295 * looking for an index which is zero in the base deref and arr_idx
296 * in the search deref and has a matching array size.
297 */
298 if ((*path_array_idx < 0 || *path_array_idx == i) &&
299 const_b_idx && b_idx == 0 &&
300 const_d_idx && d_idx == arr_idx &&
301 glsl_get_length(nir_deref_instr_parent(b)->type) ==
302 glsl_get_length(nir_deref_instr_parent(dst)->type)) {
303 *path_array_idx = i;
304 continue;
305 }
306
307 /* We're at the array index but not a candidate */
308 if (*path_array_idx == i)
309 return false;
310
311 /* If we're not the path array index, we must match exactly. We
312 * could probably just compare SSA values and trust in copy
313 * propagation but doing it ourselves means this pass can run a bit
314 * earlier.
315 */
316 if (b->arr.index.ssa == d->arr.index.ssa ||
317 (const_b_idx && const_d_idx && b_idx == d_idx))
318 continue;
319
320 return false;
321
322 case nir_deref_type_array_wildcard:
323 continue;
324
325 case nir_deref_type_struct:
326 if (b->strct.index != d->strct.index)
327 return false;
328 continue;
329
330 default:
331 unreachable("Invalid deref type in a path");
332 }
333 }
334
335 /* If we got here without failing, we've matched. However, it isn't an
336 * array match unless we found an altered array index.
337 */
338 return *path_array_idx > 0;
339 }
340
341 static void
342 handle_read(nir_deref_instr *src, struct match_state *state)
343 {
344 /* We only need to create an entry for sources that might be used to form
345 * an array copy. Hence no indirects or indexing into a vector.
346 */
347 if (nir_deref_instr_has_indirect(src) ||
348 nir_deref_instr_is_known_out_of_bounds(src) ||
349 (src->deref_type == nir_deref_type_array &&
350 glsl_type_is_vector(nir_src_as_deref(src->parent)->type)))
351 return;
352
353 nir_deref_path src_path;
354 nir_deref_path_init(&src_path, src, state->dead_ctx);
355
356 /* Create a node for this source if it doesn't exist. The point of this is
357 * to know which nodes aliasing a given store we actually need to care
358 * about, to avoid creating an excessive amount of nodes.
359 */
360 node_for_path(&src_path, state);
361 }
362
363 /* The core implementation, which is used for both copies and writes. Return
364 * true if a copy is created.
365 */
366 static bool
367 handle_write(nir_deref_instr *dst, nir_deref_instr *src,
368 unsigned write_index, unsigned read_index,
369 struct match_state *state)
370 {
371 nir_builder *b = &state->builder;
372
373 nir_deref_path dst_path;
374 nir_deref_path_init(&dst_path, dst, state->dead_ctx);
375
376 unsigned idx = 0;
377 for (nir_deref_instr **instr = dst_path.path; *instr; instr++, idx++) {
378 if ((*instr)->deref_type != nir_deref_type_array)
379 continue;
380
381 /* Get the entry where the index is replaced by a wildcard, so that we
382 * hopefully can keep matching an array copy.
383 */
384 struct match_node *dst_node =
385 node_for_path_with_wildcard(&dst_path, idx, state);
386
387 if (!src)
388 goto reset;
389
390 if (nir_src_as_uint((*instr)->arr.index) != dst_node->next_array_idx)
391 goto reset;
392
393 if (dst_node->next_array_idx == 0) {
394 /* At this point there may be multiple source indices which are zero,
395 * so we can't pin down the actual source index. Just store it and
396 * move on.
397 */
398 nir_deref_path_init(&dst_node->first_src_path, src, state->dead_ctx);
399 } else {
400 nir_deref_path src_path;
401 nir_deref_path_init(&src_path, src, state->dead_ctx);
402 bool result = try_match_deref(&dst_node->first_src_path,
403 &dst_node->src_wildcard_idx,
404 &src_path, dst_node->next_array_idx,
405 *instr);
406 nir_deref_path_finish(&src_path);
407 if (!result)
408 goto reset;
409 }
410
411 /* Check if an aliasing write clobbered the array after the last normal
412 * write. For example, with a sequence like this:
413 *
414 * dst[0][*] = src[0][*];
415 * dst[0][0] = 0; // invalidates the array copy dst[*][*] = src[*][*]
416 * dst[1][*] = src[1][*];
417 *
418 * Note that the second write wouldn't reset the entry for dst[*][*]
419 * by itself, but it'll be caught by this check when processing the
420 * third copy.
421 */
422 if (dst_node->last_successful_write < dst_node->last_overwritten)
423 goto reset;
424
425 dst_node->last_successful_write = write_index;
426
427 /* In this case we've successfully processed an array element. Check if
428 * this is the last, so that we can emit an array copy.
429 */
430 dst_node->next_array_idx++;
431 dst_node->first_src_read = MIN2(dst_node->first_src_read, read_index);
432 if (dst_node->next_array_idx > 1 &&
433 dst_node->next_array_idx == glsl_get_length((*(instr - 1))->type)) {
434 /* Make sure that nothing was overwritten. */
435 struct match_node *src_node =
436 node_for_path_with_wildcard(&dst_node->first_src_path,
437 dst_node->src_wildcard_idx,
438 state);
439
440 if (src_node->last_overwritten <= dst_node->first_src_read) {
441 nir_copy_deref(b, build_wildcard_deref(b, &dst_path, idx),
442 build_wildcard_deref(b, &dst_node->first_src_path,
443 dst_node->src_wildcard_idx));
444 foreach_aliasing_node(&dst_path, clobber, state);
445 return true;
446 }
447 } else {
448 continue;
449 }
450
451 reset:
452 dst_node->next_array_idx = 0;
453 dst_node->src_wildcard_idx = -1;
454 dst_node->last_successful_write = 0;
455 dst_node->first_src_read = UINT32_MAX;
456 }
457
458 /* Mark everything aliasing dst_path as clobbered. This needs to happen
459 * last since in the loop above we need to know what last clobbered
460 * dst_node and this overwrites that.
461 */
462 foreach_aliasing_node(&dst_path, clobber, state);
463
464 return false;
465 }
466
467 static bool
468 opt_find_array_copies_block(nir_builder *b, nir_block *block,
469 struct match_state *state)
470 {
471 bool progress = false;
472
473 unsigned next_index = 0;
474
475 _mesa_hash_table_clear(state->table, NULL);
476
477 nir_foreach_instr(instr, block) {
478 if (instr->type != nir_instr_type_intrinsic)
479 continue;
480
481 /* Index the instructions before we do anything else. */
482 instr->index = next_index++;
483
484 /* Save the index of this instruction */
485 state->cur_instr = instr->index;
486
487 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
488
489 if (intrin->intrinsic == nir_intrinsic_load_deref) {
490 handle_read(nir_src_as_deref(intrin->src[0]), state);
491 continue;
492 }
493
494 if (intrin->intrinsic != nir_intrinsic_copy_deref &&
495 intrin->intrinsic != nir_intrinsic_store_deref)
496 continue;
497
498 nir_deref_instr *dst_deref = nir_src_as_deref(intrin->src[0]);
499
500 /* The destination must be local. If we see a non-local store, we
501 * continue on because it won't affect local stores or read-only
502 * variables.
503 */
504 if (dst_deref->mode != nir_var_function_temp)
505 continue;
506
507 /* If there are any known out-of-bounds writes, then we can just skip
508 * this write as it's undefined and won't contribute to building up an
509 * array copy anyways.
510 */
511 if (nir_deref_instr_is_known_out_of_bounds(dst_deref))
512 continue;
513
514 nir_deref_instr *src_deref;
515 unsigned load_index = 0;
516 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
517 src_deref = nir_src_as_deref(intrin->src[1]);
518 load_index = intrin->instr.index;
519 } else {
520 assert(intrin->intrinsic == nir_intrinsic_store_deref);
521 nir_intrinsic_instr *load = nir_src_as_intrinsic(intrin->src[1]);
522 if (load == NULL || load->intrinsic != nir_intrinsic_load_deref) {
523 src_deref = NULL;
524 } else {
525 src_deref = nir_src_as_deref(load->src[0]);
526 load_index = load->instr.index;
527 }
528
529 if (nir_intrinsic_write_mask(intrin) !=
530 (1 << glsl_get_components(dst_deref->type)) - 1) {
531 src_deref = NULL;
532 }
533 }
534
535 /* The source must be either local or something that's guaranteed to be
536 * read-only.
537 */
538 const nir_variable_mode read_only_modes =
539 nir_var_shader_in | nir_var_uniform | nir_var_system_value;
540 if (src_deref &&
541 !(src_deref->mode & (nir_var_function_temp | read_only_modes))) {
542 src_deref = NULL;
543 }
544
545 /* There must be no indirects in the source or destination and no known
546 * out-of-bounds accesses in the source, and the copy must be fully
547 * qualified, or else we can't build up the array copy. We handled
548 * out-of-bounds accesses to the dest above. The types must match, since
549 * copy_deref currently can't bitcast mismatched deref types.
550 */
551 if (src_deref &&
552 (nir_deref_instr_has_indirect(src_deref) ||
553 nir_deref_instr_is_known_out_of_bounds(src_deref) ||
554 nir_deref_instr_has_indirect(dst_deref) ||
555 !glsl_type_is_vector_or_scalar(src_deref->type) ||
556 glsl_get_bare_type(src_deref->type) !=
557 glsl_get_bare_type(dst_deref->type))) {
558 src_deref = NULL;
559 }
560
561 state->builder.cursor = nir_after_instr(instr);
562 progress |= handle_write(dst_deref, src_deref, instr->index,
563 load_index, state);
564 }
565
566 return progress;
567 }
568
569 static bool
570 opt_find_array_copies_impl(nir_function_impl *impl)
571 {
572 nir_builder b;
573 nir_builder_init(&b, impl);
574
575 bool progress = false;
576
577 struct match_state s;
578 s.dead_ctx = ralloc_context(NULL);
579 s.table = _mesa_pointer_hash_table_create(s.dead_ctx);
580 nir_builder_init(&s.builder, impl);
581
582 nir_foreach_block(block, impl) {
583 if (opt_find_array_copies_block(&b, block, &s))
584 progress = true;
585 }
586
587 ralloc_free(s.dead_ctx);
588
589 if (progress) {
590 nir_metadata_preserve(impl, nir_metadata_block_index |
591 nir_metadata_dominance);
592 }
593
594 return progress;
595 }
596
597 /**
598 * This peephole optimization looks for a series of load/store_deref or
599 * copy_deref instructions that copy an array from one variable to another and
600 * turns it into a copy_deref that copies the entire array. The pattern it
601 * looks for is extremely specific but it's good enough to pick up on the
602 * input array copies in DXVK and should also be able to pick up the sequence
603 * generated by spirv_to_nir for a OpLoad of a large composite followed by
604 * OpStore.
605 *
606 * TODO: Support out-of-order copies.
607 */
608 bool
609 nir_opt_find_array_copies(nir_shader *shader)
610 {
611 bool progress = false;
612
613 nir_foreach_function(function, shader) {
614 if (function->impl && opt_find_array_copies_impl(function->impl))
615 progress = true;
616 }
617
618 return progress;
619 }