nir: properly find the entry to keep in copy_prop_vars
[mesa.git] / src / compiler / nir / nir_opt_copy_prop_vars.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 #include "util/bitscan.h"
29 #include "util/u_dynarray.h"
30
31 /**
32 * Variable-based copy propagation
33 *
34 * Normally, NIR trusts in SSA form for most of its copy-propagation needs.
35 * However, there are cases, especially when dealing with indirects, where SSA
36 * won't help you. This pass is for those times. Specifically, it handles
37 * the following things that the rest of NIR can't:
38 *
39 * 1) Copy-propagation on variables that have indirect access. This includes
40 * propagating from indirect stores into indirect loads.
41 *
42 * 2) Removal of redundant load_deref intrinsics. We can't trust regular CSE
43 * to do this because it isn't aware of variable writes that may alias the
44 * value and make the former load invalid.
45 *
46 * This pass uses an intermediate solution between being local / "per-block"
47 * and a complete data-flow analysis. It follows the control flow graph, and
48 * propagate the available copy information forward, invalidating data at each
49 * cf_node.
50 *
51 * Removal of dead writes to variables is handled by another pass.
52 */
53
54 struct vars_written {
55 nir_variable_mode modes;
56
57 /* Key is deref and value is the uintptr_t with the write mask. */
58 struct hash_table *derefs;
59 };
60
61 struct value {
62 bool is_ssa;
63 union {
64 nir_ssa_def *ssa[4];
65 nir_deref_instr *deref;
66 };
67 };
68
69 struct copy_entry {
70 struct value src;
71
72 nir_deref_instr *dst;
73 };
74
75 struct copy_prop_var_state {
76 nir_function_impl *impl;
77
78 void *mem_ctx;
79 void *lin_ctx;
80
81 /* Maps nodes to vars_written. Used to invalidate copy entries when
82 * visiting each node.
83 */
84 struct hash_table *vars_written_map;
85
86 bool progress;
87 };
88
89 static bool
90 value_equals_store_src(struct value *value, nir_intrinsic_instr *intrin)
91 {
92 assert(intrin->intrinsic == nir_intrinsic_store_deref);
93 uintptr_t write_mask = nir_intrinsic_write_mask(intrin);
94
95 for (unsigned i = 0; i < intrin->num_components; i++) {
96 if ((write_mask & (1 << i)) &&
97 value->ssa[i] != intrin->src[1].ssa)
98 return false;
99 }
100
101 return true;
102 }
103
104 static struct vars_written *
105 create_vars_written(struct copy_prop_var_state *state)
106 {
107 struct vars_written *written =
108 linear_zalloc_child(state->lin_ctx, sizeof(struct vars_written));
109 written->derefs = _mesa_hash_table_create(state->mem_ctx, _mesa_hash_pointer,
110 _mesa_key_pointer_equal);
111 return written;
112 }
113
114 static void
115 gather_vars_written(struct copy_prop_var_state *state,
116 struct vars_written *written,
117 nir_cf_node *cf_node)
118 {
119 struct vars_written *new_written = NULL;
120
121 switch (cf_node->type) {
122 case nir_cf_node_function: {
123 nir_function_impl *impl = nir_cf_node_as_function(cf_node);
124 foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
125 gather_vars_written(state, NULL, cf_node);
126 break;
127 }
128
129 case nir_cf_node_block: {
130 if (!written)
131 break;
132
133 nir_block *block = nir_cf_node_as_block(cf_node);
134 nir_foreach_instr(instr, block) {
135 if (instr->type == nir_instr_type_call) {
136 written->modes |= nir_var_shader_out |
137 nir_var_global |
138 nir_var_local |
139 nir_var_shader_storage |
140 nir_var_shared;
141 continue;
142 }
143
144 if (instr->type != nir_instr_type_intrinsic)
145 continue;
146
147 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
148 switch (intrin->intrinsic) {
149 case nir_intrinsic_barrier:
150 case nir_intrinsic_memory_barrier:
151 written->modes |= nir_var_shader_out |
152 nir_var_shader_storage |
153 nir_var_shared;
154 break;
155
156 case nir_intrinsic_emit_vertex:
157 case nir_intrinsic_emit_vertex_with_counter:
158 written->modes = nir_var_shader_out;
159 break;
160
161 case nir_intrinsic_store_deref:
162 case nir_intrinsic_copy_deref: {
163 /* Destination in _both_ store_deref and copy_deref is src[0]. */
164 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
165
166 uintptr_t mask = intrin->intrinsic == nir_intrinsic_store_deref ?
167 nir_intrinsic_write_mask(intrin) : (1 << glsl_get_vector_elements(dst->type)) - 1;
168
169 struct hash_entry *ht_entry = _mesa_hash_table_search(written->derefs, dst);
170 if (ht_entry)
171 ht_entry->data = (void *)(mask | (uintptr_t)ht_entry->data);
172 else
173 _mesa_hash_table_insert(written->derefs, dst, (void *)mask);
174
175 break;
176 }
177
178 default:
179 break;
180 }
181 }
182
183 break;
184 }
185
186 case nir_cf_node_if: {
187 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
188
189 new_written = create_vars_written(state);
190
191 foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
192 gather_vars_written(state, new_written, cf_node);
193
194 foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
195 gather_vars_written(state, new_written, cf_node);
196
197 break;
198 }
199
200 case nir_cf_node_loop: {
201 nir_loop *loop = nir_cf_node_as_loop(cf_node);
202
203 new_written = create_vars_written(state);
204
205 foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
206 gather_vars_written(state, new_written, cf_node);
207
208 break;
209 }
210
211 default:
212 unreachable("Invalid CF node type");
213 }
214
215 if (new_written) {
216 /* Merge new information to the parent control flow node. */
217 if (written) {
218 written->modes |= new_written->modes;
219 hash_table_foreach(new_written->derefs, new_entry) {
220 struct hash_entry *old_entry =
221 _mesa_hash_table_search_pre_hashed(written->derefs, new_entry->hash,
222 new_entry->key);
223 if (old_entry) {
224 nir_component_mask_t merged = (uintptr_t) new_entry->data |
225 (uintptr_t) old_entry->data;
226 old_entry->data = (void *) ((uintptr_t) merged);
227 } else {
228 _mesa_hash_table_insert_pre_hashed(written->derefs, new_entry->hash,
229 new_entry->key, new_entry->data);
230 }
231 }
232 }
233 _mesa_hash_table_insert(state->vars_written_map, cf_node, new_written);
234 }
235 }
236
237 static struct copy_entry *
238 copy_entry_create(struct util_dynarray *copies,
239 nir_deref_instr *dst_deref)
240 {
241 struct copy_entry new_entry = {
242 .dst = dst_deref,
243 };
244 util_dynarray_append(copies, struct copy_entry, new_entry);
245 return util_dynarray_top_ptr(copies, struct copy_entry);
246 }
247
248 /* Remove copy entry by swapping it with the last element and reducing the
249 * size. If used inside an iteration on copies, it must be a reverse
250 * (backwards) iteration. It is safe to use in those cases because the swap
251 * will not affect the rest of the iteration.
252 */
253 static void
254 copy_entry_remove(struct util_dynarray *copies,
255 struct copy_entry *entry)
256 {
257 /* This also works when removing the last element since pop don't shrink
258 * the memory used by the array, so the swap is useless but not invalid.
259 */
260 *entry = util_dynarray_pop(copies, struct copy_entry);
261 }
262
263 static struct copy_entry *
264 lookup_entry_for_deref(struct util_dynarray *copies,
265 nir_deref_instr *deref,
266 nir_deref_compare_result allowed_comparisons)
267 {
268 util_dynarray_foreach(copies, struct copy_entry, iter) {
269 if (nir_compare_derefs(iter->dst, deref) & allowed_comparisons)
270 return iter;
271 }
272
273 return NULL;
274 }
275
276 static struct copy_entry *
277 lookup_entry_and_kill_aliases(struct util_dynarray *copies,
278 nir_deref_instr *deref,
279 unsigned write_mask)
280 {
281 /* TODO: Take into account the write_mask. */
282
283 nir_deref_instr *dst_match = NULL;
284 util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
285 if (!iter->src.is_ssa) {
286 /* If this write aliases the source of some entry, get rid of it */
287 if (nir_compare_derefs(iter->src.deref, deref) & nir_derefs_may_alias_bit) {
288 copy_entry_remove(copies, iter);
289 continue;
290 }
291 }
292
293 nir_deref_compare_result comp = nir_compare_derefs(iter->dst, deref);
294
295 if (comp & nir_derefs_equal_bit) {
296 /* Removing entries invalidate previous iter pointers, so we'll
297 * collect the matching entry later. Just make sure it is unique.
298 */
299 assert(!dst_match);
300 dst_match = iter->dst;
301 } else if (comp & nir_derefs_may_alias_bit) {
302 copy_entry_remove(copies, iter);
303 }
304 }
305
306 struct copy_entry *entry = NULL;
307 if (dst_match) {
308 util_dynarray_foreach(copies, struct copy_entry, iter) {
309 if (iter->dst == dst_match) {
310 entry = iter;
311 break;
312 }
313 }
314 assert(entry);
315 }
316 return entry;
317 }
318
319 static void
320 kill_aliases(struct util_dynarray *copies,
321 nir_deref_instr *deref,
322 unsigned write_mask)
323 {
324 /* TODO: Take into account the write_mask. */
325
326 struct copy_entry *entry =
327 lookup_entry_and_kill_aliases(copies, deref, write_mask);
328 if (entry)
329 copy_entry_remove(copies, entry);
330 }
331
332 static struct copy_entry *
333 get_entry_and_kill_aliases(struct util_dynarray *copies,
334 nir_deref_instr *deref,
335 unsigned write_mask)
336 {
337 /* TODO: Take into account the write_mask. */
338
339 struct copy_entry *entry =
340 lookup_entry_and_kill_aliases(copies, deref, write_mask);
341
342 if (entry == NULL)
343 entry = copy_entry_create(copies, deref);
344
345 return entry;
346 }
347
348 static void
349 apply_barrier_for_modes(struct util_dynarray *copies,
350 nir_variable_mode modes)
351 {
352 util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
353 if ((iter->dst->mode & modes) ||
354 (!iter->src.is_ssa && (iter->src.deref->mode & modes)))
355 copy_entry_remove(copies, iter);
356 }
357 }
358
359 static void
360 store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
361 const struct value *value, unsigned write_mask)
362 {
363 if (value->is_ssa) {
364 /* Clear src if it was being used as non-SSA. */
365 if (!entry->src.is_ssa)
366 memset(entry->src.ssa, 0, sizeof(entry->src.ssa));
367 entry->src.is_ssa = true;
368 /* Only overwrite the written components */
369 for (unsigned i = 0; i < 4; i++) {
370 if (write_mask & (1 << i))
371 entry->src.ssa[i] = value->ssa[i];
372 }
373 } else {
374 /* Non-ssa stores always write everything */
375 entry->src.is_ssa = false;
376 entry->src.deref = value->deref;
377 }
378 }
379
380 /* Do a "load" from an SSA-based entry return it in "value" as a value with a
381 * single SSA def. Because an entry could reference up to 4 different SSA
382 * defs, a vecN operation may be inserted to combine them into a single SSA
383 * def before handing it back to the caller. If the load instruction is no
384 * longer needed, it is removed and nir_instr::block is set to NULL. (It is
385 * possible, in some cases, for the load to be used in the vecN operation in
386 * which case it isn't deleted.)
387 */
388 static bool
389 load_from_ssa_entry_value(struct copy_prop_var_state *state,
390 struct copy_entry *entry,
391 nir_builder *b, nir_intrinsic_instr *intrin,
392 struct value *value)
393 {
394 *value = entry->src;
395 assert(value->is_ssa);
396
397 const struct glsl_type *type = entry->dst->type;
398 unsigned num_components = glsl_get_vector_elements(type);
399
400 nir_component_mask_t available = 0;
401 bool all_same = true;
402 for (unsigned i = 0; i < num_components; i++) {
403 if (value->ssa[i])
404 available |= (1 << i);
405
406 if (value->ssa[i] != value->ssa[0])
407 all_same = false;
408 }
409
410 if (all_same) {
411 /* Our work here is done */
412 b->cursor = nir_instr_remove(&intrin->instr);
413 intrin->instr.block = NULL;
414 return true;
415 }
416
417 if (available != (1 << num_components) - 1 &&
418 intrin->intrinsic == nir_intrinsic_load_deref &&
419 (available & nir_ssa_def_components_read(&intrin->dest.ssa)) == 0) {
420 /* If none of the components read are available as SSA values, then we
421 * should just bail. Otherwise, we would end up replacing the uses of
422 * the load_deref a vecN() that just gathers up its components.
423 */
424 return false;
425 }
426
427 b->cursor = nir_after_instr(&intrin->instr);
428
429 nir_ssa_def *load_def =
430 intrin->intrinsic == nir_intrinsic_load_deref ? &intrin->dest.ssa : NULL;
431
432 bool keep_intrin = false;
433 nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
434 for (unsigned i = 0; i < num_components; i++) {
435 if (value->ssa[i]) {
436 comps[i] = nir_channel(b, value->ssa[i], i);
437 } else {
438 /* We don't have anything for this component in our
439 * list. Just re-use a channel from the load.
440 */
441 if (load_def == NULL)
442 load_def = nir_load_deref(b, entry->dst);
443
444 if (load_def->parent_instr == &intrin->instr)
445 keep_intrin = true;
446
447 comps[i] = nir_channel(b, load_def, i);
448 }
449 }
450
451 nir_ssa_def *vec = nir_vec(b, comps, num_components);
452 for (unsigned i = 0; i < num_components; i++)
453 value->ssa[i] = vec;
454
455 if (!keep_intrin) {
456 /* Removing this instruction should not touch the cursor because we
457 * created the cursor after the intrinsic and have added at least one
458 * instruction (the vec) since then.
459 */
460 assert(b->cursor.instr != &intrin->instr);
461 nir_instr_remove(&intrin->instr);
462 intrin->instr.block = NULL;
463 }
464
465 return true;
466 }
467
468 /**
469 * Specialize the wildcards in a deref chain
470 *
471 * This function returns a deref chain identical to \param deref except that
472 * some of its wildcards are replaced with indices from \param specific. The
473 * process is guided by \param guide which references the same type as \param
474 * specific but has the same wildcard array lengths as \param deref.
475 */
476 static nir_deref_instr *
477 specialize_wildcards(nir_builder *b,
478 nir_deref_path *deref,
479 nir_deref_path *guide,
480 nir_deref_path *specific)
481 {
482 nir_deref_instr **deref_p = &deref->path[1];
483 nir_deref_instr **guide_p = &guide->path[1];
484 nir_deref_instr **spec_p = &specific->path[1];
485 nir_deref_instr *ret_tail = deref->path[0];
486 for (; *deref_p; deref_p++) {
487 if ((*deref_p)->deref_type == nir_deref_type_array_wildcard) {
488 /* This is where things get tricky. We have to search through
489 * the entry deref to find its corresponding wildcard and fill
490 * this slot in with the value from the src.
491 */
492 while (*guide_p &&
493 (*guide_p)->deref_type != nir_deref_type_array_wildcard) {
494 guide_p++;
495 spec_p++;
496 }
497 assert(*guide_p && *spec_p);
498
499 ret_tail = nir_build_deref_follower(b, ret_tail, *spec_p);
500
501 guide_p++;
502 spec_p++;
503 } else {
504 ret_tail = nir_build_deref_follower(b, ret_tail, *deref_p);
505 }
506 }
507
508 return ret_tail;
509 }
510
511 /* Do a "load" from an deref-based entry return it in "value" as a value. The
512 * deref returned in "value" will always be a fresh copy so the caller can
513 * steal it and assign it to the instruction directly without copying it
514 * again.
515 */
516 static bool
517 load_from_deref_entry_value(struct copy_prop_var_state *state,
518 struct copy_entry *entry,
519 nir_builder *b, nir_intrinsic_instr *intrin,
520 nir_deref_instr *src, struct value *value)
521 {
522 *value = entry->src;
523
524 b->cursor = nir_instr_remove(&intrin->instr);
525
526 nir_deref_path entry_dst_path, src_path;
527 nir_deref_path_init(&entry_dst_path, entry->dst, state->mem_ctx);
528 nir_deref_path_init(&src_path, src, state->mem_ctx);
529
530 bool need_to_specialize_wildcards = false;
531 nir_deref_instr **entry_p = &entry_dst_path.path[1];
532 nir_deref_instr **src_p = &src_path.path[1];
533 while (*entry_p && *src_p) {
534 nir_deref_instr *entry_tail = *entry_p++;
535 nir_deref_instr *src_tail = *src_p++;
536
537 if (src_tail->deref_type == nir_deref_type_array &&
538 entry_tail->deref_type == nir_deref_type_array_wildcard)
539 need_to_specialize_wildcards = true;
540 }
541
542 /* If the entry deref is longer than the source deref then it refers to a
543 * smaller type and we can't source from it.
544 */
545 assert(*entry_p == NULL);
546
547 if (need_to_specialize_wildcards) {
548 /* The entry has some wildcards that are not in src. This means we need
549 * to construct a new deref based on the entry but using the wildcards
550 * from the source and guided by the entry dst. Oof.
551 */
552 nir_deref_path entry_src_path;
553 nir_deref_path_init(&entry_src_path, entry->src.deref, state->mem_ctx);
554 value->deref = specialize_wildcards(b, &entry_src_path,
555 &entry_dst_path, &src_path);
556 nir_deref_path_finish(&entry_src_path);
557 }
558
559 /* If our source deref is longer than the entry deref, that's ok because
560 * it just means the entry deref needs to be extended a bit.
561 */
562 while (*src_p) {
563 nir_deref_instr *src_tail = *src_p++;
564 value->deref = nir_build_deref_follower(b, value->deref, src_tail);
565 }
566
567 nir_deref_path_finish(&entry_dst_path);
568 nir_deref_path_finish(&src_path);
569
570 return true;
571 }
572
573 static bool
574 try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
575 nir_builder *b, nir_intrinsic_instr *intrin,
576 nir_deref_instr *src, struct value *value)
577 {
578 if (entry == NULL)
579 return false;
580
581 if (entry->src.is_ssa) {
582 return load_from_ssa_entry_value(state, entry, b, intrin, value);
583 } else {
584 return load_from_deref_entry_value(state, entry, b, intrin, src, value);
585 }
586 }
587
588 static void
589 invalidate_copies_for_cf_node(struct copy_prop_var_state *state,
590 struct util_dynarray *copies,
591 nir_cf_node *cf_node)
592 {
593 struct hash_entry *ht_entry = _mesa_hash_table_search(state->vars_written_map, cf_node);
594 assert(ht_entry);
595
596 struct vars_written *written = ht_entry->data;
597 if (written->modes) {
598 util_dynarray_foreach_reverse(copies, struct copy_entry, entry) {
599 if (entry->dst->mode & written->modes)
600 copy_entry_remove(copies, entry);
601 }
602 }
603
604 hash_table_foreach (written->derefs, entry) {
605 nir_deref_instr *deref_written = (nir_deref_instr *)entry->key;
606 kill_aliases(copies, deref_written, (uintptr_t)entry->data);
607 }
608 }
609
610 static void
611 copy_prop_vars_block(struct copy_prop_var_state *state,
612 nir_builder *b, nir_block *block,
613 struct util_dynarray *copies)
614 {
615 nir_foreach_instr_safe(instr, block) {
616 if (instr->type == nir_instr_type_call) {
617 apply_barrier_for_modes(copies, nir_var_shader_out |
618 nir_var_global |
619 nir_var_local |
620 nir_var_shader_storage |
621 nir_var_shared);
622 continue;
623 }
624
625 if (instr->type != nir_instr_type_intrinsic)
626 continue;
627
628 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
629 switch (intrin->intrinsic) {
630 case nir_intrinsic_barrier:
631 case nir_intrinsic_memory_barrier:
632 apply_barrier_for_modes(copies, nir_var_shader_out |
633 nir_var_shader_storage |
634 nir_var_shared);
635 break;
636
637 case nir_intrinsic_emit_vertex:
638 case nir_intrinsic_emit_vertex_with_counter:
639 apply_barrier_for_modes(copies, nir_var_shader_out);
640 break;
641
642 case nir_intrinsic_load_deref: {
643 nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
644
645 struct copy_entry *src_entry =
646 lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit);
647 struct value value;
648 if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
649 if (value.is_ssa) {
650 /* lookup_load has already ensured that we get a single SSA
651 * value that has all of the channels. We just have to do the
652 * rewrite operation.
653 */
654 if (intrin->instr.block) {
655 /* The lookup left our instruction in-place. This means it
656 * must have used it to vec up a bunch of different sources.
657 * We need to be careful when rewriting uses so we don't
658 * rewrite the vecN itself.
659 */
660 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
661 nir_src_for_ssa(value.ssa[0]),
662 value.ssa[0]->parent_instr);
663 } else {
664 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
665 nir_src_for_ssa(value.ssa[0]));
666 }
667 } else {
668 /* We're turning it into a load of a different variable */
669 intrin->src[0] = nir_src_for_ssa(&value.deref->dest.ssa);
670
671 /* Put it back in again. */
672 nir_builder_instr_insert(b, instr);
673
674 value.is_ssa = true;
675 for (unsigned i = 0; i < intrin->num_components; i++)
676 value.ssa[i] = &intrin->dest.ssa;
677 }
678 state->progress = true;
679 } else {
680 value.is_ssa = true;
681 for (unsigned i = 0; i < intrin->num_components; i++)
682 value.ssa[i] = &intrin->dest.ssa;
683 }
684
685 /* Now that we have a value, we're going to store it back so that we
686 * have the right value next time we come looking for it. In order
687 * to do this, we need an exact match, not just something that
688 * contains what we're looking for.
689 */
690 struct copy_entry *store_entry =
691 lookup_entry_for_deref(copies, src, nir_derefs_equal_bit);
692 if (!store_entry)
693 store_entry = copy_entry_create(copies, src);
694
695 /* Set up a store to this entry with the value of the load. This way
696 * we can potentially remove subsequent loads. However, we use a
697 * NULL instruction so we don't try and delete the load on a
698 * subsequent store.
699 */
700 store_to_entry(state, store_entry, &value,
701 ((1 << intrin->num_components) - 1));
702 break;
703 }
704
705 case nir_intrinsic_store_deref: {
706 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
707 struct copy_entry *entry =
708 lookup_entry_for_deref(copies, dst, nir_derefs_equal_bit);
709 if (entry && value_equals_store_src(&entry->src, intrin)) {
710 /* If we are storing the value from a load of the same var the
711 * store is redundant so remove it.
712 */
713 nir_instr_remove(instr);
714 } else {
715 struct value value = {
716 .is_ssa = true
717 };
718
719 for (unsigned i = 0; i < intrin->num_components; i++)
720 value.ssa[i] = intrin->src[1].ssa;
721
722 unsigned wrmask = nir_intrinsic_write_mask(intrin);
723 struct copy_entry *entry =
724 get_entry_and_kill_aliases(copies, dst, wrmask);
725 store_to_entry(state, entry, &value, wrmask);
726 }
727
728 break;
729 }
730
731 case nir_intrinsic_copy_deref: {
732 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
733 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
734
735 if (nir_compare_derefs(src, dst) & nir_derefs_equal_bit) {
736 /* This is a no-op self-copy. Get rid of it */
737 nir_instr_remove(instr);
738 continue;
739 }
740
741 struct copy_entry *src_entry =
742 lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit);
743 struct value value;
744 if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
745 if (value.is_ssa) {
746 nir_store_deref(b, dst, value.ssa[0], 0xf);
747 intrin = nir_instr_as_intrinsic(nir_builder_last_instr(b));
748 } else {
749 /* If this would be a no-op self-copy, don't bother. */
750 if (nir_compare_derefs(value.deref, dst) & nir_derefs_equal_bit)
751 continue;
752
753 /* Just turn it into a copy of a different deref */
754 intrin->src[1] = nir_src_for_ssa(&value.deref->dest.ssa);
755
756 /* Put it back in again. */
757 nir_builder_instr_insert(b, instr);
758 }
759
760 state->progress = true;
761 } else {
762 value = (struct value) {
763 .is_ssa = false,
764 { .deref = src },
765 };
766 }
767
768 struct copy_entry *dst_entry =
769 get_entry_and_kill_aliases(copies, dst, 0xf);
770 store_to_entry(state, dst_entry, &value, 0xf);
771 break;
772 }
773
774 default:
775 break;
776 }
777 }
778 }
779
780 static void
781 copy_prop_vars_cf_node(struct copy_prop_var_state *state,
782 struct util_dynarray *copies,
783 nir_cf_node *cf_node)
784 {
785 switch (cf_node->type) {
786 case nir_cf_node_function: {
787 nir_function_impl *impl = nir_cf_node_as_function(cf_node);
788
789 struct util_dynarray impl_copies;
790 util_dynarray_init(&impl_copies, state->mem_ctx);
791
792 foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
793 copy_prop_vars_cf_node(state, &impl_copies, cf_node);
794
795 break;
796 }
797
798 case nir_cf_node_block: {
799 nir_block *block = nir_cf_node_as_block(cf_node);
800 nir_builder b;
801 nir_builder_init(&b, state->impl);
802 copy_prop_vars_block(state, &b, block, copies);
803 break;
804 }
805
806 case nir_cf_node_if: {
807 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
808
809 /* Clone the copies for each branch of the if statement. The idea is
810 * that they both see the same state of available copies, but do not
811 * interfere to each other.
812 */
813
814 struct util_dynarray then_copies;
815 util_dynarray_clone(&then_copies, state->mem_ctx, copies);
816
817 struct util_dynarray else_copies;
818 util_dynarray_clone(&else_copies, state->mem_ctx, copies);
819
820 foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
821 copy_prop_vars_cf_node(state, &then_copies, cf_node);
822
823 foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
824 copy_prop_vars_cf_node(state, &else_copies, cf_node);
825
826 /* Both branches copies can be ignored, since the effect of running both
827 * branches was captured in the first pass that collects vars_written.
828 */
829
830 invalidate_copies_for_cf_node(state, copies, cf_node);
831
832 break;
833 }
834
835 case nir_cf_node_loop: {
836 nir_loop *loop = nir_cf_node_as_loop(cf_node);
837
838 /* Invalidate before cloning the copies for the loop, since the loop
839 * body can be executed more than once.
840 */
841
842 invalidate_copies_for_cf_node(state, copies, cf_node);
843
844 struct util_dynarray loop_copies;
845 util_dynarray_clone(&loop_copies, state->mem_ctx, copies);
846
847 foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
848 copy_prop_vars_cf_node(state, &loop_copies, cf_node);
849
850 break;
851 }
852
853 default:
854 unreachable("Invalid CF node type");
855 }
856 }
857
858 static bool
859 nir_copy_prop_vars_impl(nir_function_impl *impl)
860 {
861 void *mem_ctx = ralloc_context(NULL);
862
863 struct copy_prop_var_state state = {
864 .impl = impl,
865 .mem_ctx = mem_ctx,
866 .lin_ctx = linear_zalloc_parent(mem_ctx, 0),
867
868 .vars_written_map = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
869 _mesa_key_pointer_equal),
870 };
871
872 gather_vars_written(&state, NULL, &impl->cf_node);
873
874 copy_prop_vars_cf_node(&state, NULL, &impl->cf_node);
875
876 if (state.progress) {
877 nir_metadata_preserve(impl, nir_metadata_block_index |
878 nir_metadata_dominance);
879 }
880
881 ralloc_free(mem_ctx);
882 return state.progress;
883 }
884
885 bool
886 nir_opt_copy_prop_vars(nir_shader *shader)
887 {
888 bool progress = false;
889
890 nir_foreach_function(function, shader) {
891 if (!function->impl)
892 continue;
893 progress |= nir_copy_prop_vars_impl(function->impl);
894 }
895
896 return progress;
897 }