2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
32 * This file implements an out-of-SSA pass as described in "Revisiting
33 * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
37 struct from_ssa_state
{
40 struct hash_table
*ssa_table
;
41 struct hash_table
*merge_node_table
;
43 nir_function_impl
*impl
;
46 /* Returns true if a dominates b */
48 ssa_def_dominates(nir_ssa_def
*a
, nir_ssa_def
*b
)
50 if (a
->live_index
== 0) {
51 /* SSA undefs always dominate */
53 } else if (b
->live_index
< a
->live_index
) {
55 } else if (a
->parent_instr
->block
== b
->parent_instr
->block
) {
56 return a
->live_index
<= b
->live_index
;
58 return nir_block_dominates(a
->parent_instr
->block
,
59 b
->parent_instr
->block
);
64 /* The following data structure, which I have named merge_set is a way of
65 * representing a set registers of non-interfering registers. This is
66 * based on the concept of a "dominence forest" presented in "Fast Copy
67 * Coalescing and Live-Range Identification" by Budimlic et. al. but the
68 * implementation concept is taken from "Revisiting Out-of-SSA Translation
69 * for Correctness, Code Quality, and Efficiency" by Boissinot et. al..
71 * Each SSA definition is associated with a merge_node and the association
72 * is represented by a combination of a hash table and the "def" parameter
73 * in the merge_node structure. The merge_set stores a linked list of
74 * merge_node's in dominence order of the ssa definitions. (Since the
75 * liveness analysis pass indexes the SSA values in dominence order for us,
76 * this is an easy thing to keep up.) It is assumed that no pair of the
77 * nodes in a given set interfere. Merging two sets or checking for
78 * interference can be done in a single linear-time merge-sort walk of the
84 struct exec_node node
;
85 struct merge_set
*set
;
89 typedef struct merge_set
{
90 struct exec_list nodes
;
97 merge_set_dump(merge_set
*set
, FILE *fp
)
99 nir_ssa_def
*dom
[set
->size
];
102 foreach_list_typed(merge_node
, node
, node
, &set
->nodes
) {
103 while (dom_idx
>= 0 && !ssa_def_dominates(dom
[dom_idx
], node
->def
))
106 for (int i
= 0; i
<= dom_idx
; i
++)
110 fprintf(fp
, "ssa_%d /* %s */\n", node
->def
->index
, node
->def
->name
);
112 fprintf(fp
, "ssa_%d\n", node
->def
->index
);
114 dom
[++dom_idx
] = node
->def
;
120 get_merge_node(nir_ssa_def
*def
, struct from_ssa_state
*state
)
122 struct hash_entry
*entry
=
123 _mesa_hash_table_search(state
->merge_node_table
, def
);
127 merge_set
*set
= ralloc(state
->dead_ctx
, merge_set
);
128 exec_list_make_empty(&set
->nodes
);
132 merge_node
*node
= ralloc(state
->dead_ctx
, merge_node
);
135 exec_list_push_head(&set
->nodes
, &node
->node
);
137 _mesa_hash_table_insert(state
->merge_node_table
, def
, node
);
143 merge_nodes_interfere(merge_node
*a
, merge_node
*b
)
145 return nir_ssa_defs_interfere(a
->def
, b
->def
);
148 /* Merges b into a */
150 merge_merge_sets(merge_set
*a
, merge_set
*b
)
152 struct exec_node
*an
= exec_list_get_head(&a
->nodes
);
153 struct exec_node
*bn
= exec_list_get_head(&b
->nodes
);
154 while (!exec_node_is_tail_sentinel(bn
)) {
155 merge_node
*a_node
= exec_node_data(merge_node
, an
, node
);
156 merge_node
*b_node
= exec_node_data(merge_node
, bn
, node
);
158 if (exec_node_is_tail_sentinel(an
) ||
159 a_node
->def
->live_index
> b_node
->def
->live_index
) {
160 struct exec_node
*next
= bn
->next
;
161 exec_node_remove(bn
);
162 exec_node_insert_node_before(an
, bn
);
163 exec_node_data(merge_node
, bn
, node
)->set
= a
;
176 /* Checks for any interference between two merge sets
178 * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
179 * Translation for Correctness, Code Quality, and Efficiency" by
183 merge_sets_interfere(merge_set
*a
, merge_set
*b
)
185 NIR_VLA(merge_node
*, dom
, a
->size
+ b
->size
);
188 struct exec_node
*an
= exec_list_get_head(&a
->nodes
);
189 struct exec_node
*bn
= exec_list_get_head(&b
->nodes
);
190 while (!exec_node_is_tail_sentinel(an
) ||
191 !exec_node_is_tail_sentinel(bn
)) {
194 if (exec_node_is_tail_sentinel(an
)) {
195 current
= exec_node_data(merge_node
, bn
, node
);
197 } else if (exec_node_is_tail_sentinel(bn
)) {
198 current
= exec_node_data(merge_node
, an
, node
);
201 merge_node
*a_node
= exec_node_data(merge_node
, an
, node
);
202 merge_node
*b_node
= exec_node_data(merge_node
, bn
, node
);
204 if (a_node
->def
->live_index
<= b_node
->def
->live_index
) {
213 while (dom_idx
>= 0 &&
214 !ssa_def_dominates(dom
[dom_idx
]->def
, current
->def
))
217 if (dom_idx
>= 0 && merge_nodes_interfere(current
, dom
[dom_idx
]))
220 dom
[++dom_idx
] = current
;
227 add_parallel_copy_to_end_of_block(nir_block
*block
, void *void_state
)
229 struct from_ssa_state
*state
= void_state
;
231 bool need_end_copy
= false;
232 if (block
->successors
[0]) {
233 nir_instr
*instr
= nir_block_first_instr(block
->successors
[0]);
234 if (instr
&& instr
->type
== nir_instr_type_phi
)
235 need_end_copy
= true;
238 if (block
->successors
[1]) {
239 nir_instr
*instr
= nir_block_first_instr(block
->successors
[1]);
240 if (instr
&& instr
->type
== nir_instr_type_phi
)
241 need_end_copy
= true;
245 /* If one of our successors has at least one phi node, we need to
246 * create a parallel copy at the end of the block but before the jump
249 nir_parallel_copy_instr
*pcopy
=
250 nir_parallel_copy_instr_create(state
->dead_ctx
);
252 nir_instr
*last_instr
= nir_block_last_instr(block
);
253 if (last_instr
&& last_instr
->type
== nir_instr_type_jump
) {
254 nir_instr_insert_before(last_instr
, &pcopy
->instr
);
256 nir_instr_insert_after_block(block
, &pcopy
->instr
);
263 static nir_parallel_copy_instr
*
264 get_parallel_copy_at_end_of_block(nir_block
*block
)
266 nir_instr
*last_instr
= nir_block_last_instr(block
);
267 if (last_instr
== NULL
)
270 /* The last instruction may be a jump in which case the parallel copy is
273 if (last_instr
->type
== nir_instr_type_jump
)
274 last_instr
= nir_instr_prev(last_instr
);
276 if (last_instr
&& last_instr
->type
== nir_instr_type_parallel_copy
)
277 return nir_instr_as_parallel_copy(last_instr
);
282 /** Isolate phi nodes with parallel copies
284 * In order to solve the dependency problems with the sources and
285 * destinations of phi nodes, we first isolate them by adding parallel
286 * copies to the beginnings and ends of basic blocks. For every block with
287 * phi nodes, we add a parallel copy immediately following the last phi
288 * node that copies the destinations of all of the phi nodes to new SSA
289 * values. We also add a parallel copy to the end of every block that has
290 * a successor with phi nodes that, for each phi node in each successor,
291 * copies the corresponding sorce of the phi node and adjust the phi to
292 * used the destination of the parallel copy.
294 * In SSA form, each value has exactly one definition. What this does is
295 * ensure that each value used in a phi also has exactly one use. The
296 * destinations of phis are only used by the parallel copy immediately
297 * following the phi nodes and. Thanks to the parallel copy at the end of
298 * the predecessor block, the sources of phi nodes are are the only use of
299 * that value. This allows us to immediately assign all the sources and
300 * destinations of any given phi node to the same register without worrying
301 * about interference at all. We do coalescing to get rid of the parallel
302 * copies where possible.
304 * Before this pass can be run, we have to iterate over the blocks with
305 * add_parallel_copy_to_end_of_block to ensure that the parallel copies at
306 * the ends of blocks exist. We can create the ones at the beginnings as
307 * we go, but the ones at the ends of blocks need to be created ahead of
308 * time because of potential back-edges in the CFG.
311 isolate_phi_nodes_block(nir_block
*block
, void *void_state
)
313 struct from_ssa_state
*state
= void_state
;
315 nir_instr
*last_phi_instr
= NULL
;
316 nir_foreach_instr(block
, instr
) {
317 /* Phi nodes only ever come at the start of a block */
318 if (instr
->type
!= nir_instr_type_phi
)
321 last_phi_instr
= instr
;
324 /* If we don't have any phi's, then there's nothing for us to do. */
325 if (last_phi_instr
== NULL
)
328 /* If we have phi nodes, we need to create a parallel copy at the
329 * start of this block but after the phi nodes.
331 nir_parallel_copy_instr
*block_pcopy
=
332 nir_parallel_copy_instr_create(state
->dead_ctx
);
333 nir_instr_insert_after(last_phi_instr
, &block_pcopy
->instr
);
335 nir_foreach_instr(block
, instr
) {
336 /* Phi nodes only ever come at the start of a block */
337 if (instr
->type
!= nir_instr_type_phi
)
340 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
341 assert(phi
->dest
.is_ssa
);
342 nir_foreach_phi_src(phi
, src
) {
343 nir_parallel_copy_instr
*pcopy
=
344 get_parallel_copy_at_end_of_block(src
->pred
);
347 nir_parallel_copy_entry
*entry
= ralloc(state
->dead_ctx
,
348 nir_parallel_copy_entry
);
349 exec_list_push_tail(&pcopy
->entries
, &entry
->node
);
351 nir_src_copy(&entry
->src
, &src
->src
, state
->dead_ctx
);
352 _mesa_set_add(src
->src
.ssa
->uses
, &pcopy
->instr
);
354 nir_ssa_dest_init(&pcopy
->instr
, &entry
->dest
,
355 phi
->dest
.ssa
.num_components
, src
->src
.ssa
->name
);
357 struct set_entry
*use_entry
=
358 _mesa_set_search(src
->src
.ssa
->uses
, instr
);
360 /* It is possible that a phi node can use the same source twice
361 * but for different basic blocks. If that happens, entry will
362 * be NULL because we already deleted it. This is safe
363 * because, by the time the loop is done, we will have deleted
364 * all of the sources of the phi from their respective use sets
365 * and moved them to the parallel copy definitions.
367 _mesa_set_remove(src
->src
.ssa
->uses
, use_entry
);
369 src
->src
.ssa
= &entry
->dest
.ssa
;
370 _mesa_set_add(entry
->dest
.ssa
.uses
, instr
);
373 nir_parallel_copy_entry
*entry
= ralloc(state
->dead_ctx
,
374 nir_parallel_copy_entry
);
375 exec_list_push_tail(&block_pcopy
->entries
, &entry
->node
);
377 nir_ssa_dest_init(&block_pcopy
->instr
, &entry
->dest
,
378 phi
->dest
.ssa
.num_components
, phi
->dest
.ssa
.name
);
379 nir_ssa_def_rewrite_uses(&phi
->dest
.ssa
,
380 nir_src_for_ssa(&entry
->dest
.ssa
),
383 entry
->src
.is_ssa
= true;
384 entry
->src
.ssa
= &phi
->dest
.ssa
;
385 _mesa_set_add(phi
->dest
.ssa
.uses
, &block_pcopy
->instr
);
392 coalesce_phi_nodes_block(nir_block
*block
, void *void_state
)
394 struct from_ssa_state
*state
= void_state
;
396 nir_foreach_instr(block
, instr
) {
397 /* Phi nodes only ever come at the start of a block */
398 if (instr
->type
!= nir_instr_type_phi
)
401 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
403 assert(phi
->dest
.is_ssa
);
404 merge_node
*dest_node
= get_merge_node(&phi
->dest
.ssa
, state
);
406 nir_foreach_phi_src(phi
, src
) {
407 assert(src
->src
.is_ssa
);
408 merge_node
*src_node
= get_merge_node(src
->src
.ssa
, state
);
409 if (src_node
->set
!= dest_node
->set
)
410 merge_merge_sets(dest_node
->set
, src_node
->set
);
418 agressive_coalesce_parallel_copy(nir_parallel_copy_instr
*pcopy
,
419 struct from_ssa_state
*state
)
421 nir_foreach_parallel_copy_entry(pcopy
, entry
) {
422 if (!entry
->src
.is_ssa
)
425 /* Since load_const instructions are SSA only, we can't replace their
426 * destinations with registers and, therefore, can't coalesce them.
428 if (entry
->src
.ssa
->parent_instr
->type
== nir_instr_type_load_const
)
431 /* Don't try and coalesce these */
432 if (entry
->dest
.ssa
.num_components
!= entry
->src
.ssa
->num_components
)
435 merge_node
*src_node
= get_merge_node(entry
->src
.ssa
, state
);
436 merge_node
*dest_node
= get_merge_node(&entry
->dest
.ssa
, state
);
438 if (src_node
->set
== dest_node
->set
)
441 if (!merge_sets_interfere(src_node
->set
, dest_node
->set
))
442 merge_merge_sets(src_node
->set
, dest_node
->set
);
447 agressive_coalesce_block(nir_block
*block
, void *void_state
)
449 struct from_ssa_state
*state
= void_state
;
451 nir_parallel_copy_instr
*start_pcopy
= NULL
;
452 nir_foreach_instr(block
, instr
) {
453 /* Phi nodes only ever come at the start of a block */
454 if (instr
->type
!= nir_instr_type_phi
) {
455 if (instr
->type
!= nir_instr_type_parallel_copy
)
456 break; /* The parallel copy must be right after the phis */
458 start_pcopy
= nir_instr_as_parallel_copy(instr
);
460 agressive_coalesce_parallel_copy(start_pcopy
, state
);
466 nir_parallel_copy_instr
*end_pcopy
=
467 get_parallel_copy_at_end_of_block(block
);
469 if (end_pcopy
&& end_pcopy
!= start_pcopy
)
470 agressive_coalesce_parallel_copy(end_pcopy
, state
);
475 static nir_register
*
476 get_register_for_ssa_def(nir_ssa_def
*def
, struct from_ssa_state
*state
)
478 struct hash_entry
*entry
=
479 _mesa_hash_table_search(state
->merge_node_table
, def
);
481 merge_node
*node
= (merge_node
*)entry
->data
;
483 /* If it doesn't have a register yet, create one. Note that all of
484 * the things in the merge set should be the same so it doesn't
485 * matter which node's definition we use.
487 if (node
->set
->reg
== NULL
) {
488 node
->set
->reg
= nir_local_reg_create(state
->impl
);
489 node
->set
->reg
->name
= def
->name
;
490 node
->set
->reg
->num_components
= def
->num_components
;
491 node
->set
->reg
->num_array_elems
= 0;
494 return node
->set
->reg
;
497 entry
= _mesa_hash_table_search(state
->ssa_table
, def
);
499 return (nir_register
*)entry
->data
;
501 /* We leave load_const SSA values alone. They act as immediates to
502 * the backend. If it got coalesced into a phi, that's ok.
504 if (def
->parent_instr
->type
== nir_instr_type_load_const
)
507 nir_register
*reg
= nir_local_reg_create(state
->impl
);
508 reg
->name
= def
->name
;
509 reg
->num_components
= def
->num_components
;
510 reg
->num_array_elems
= 0;
512 /* This register comes from an SSA definition that is defined and not
513 * part of a phi-web. Therefore, we know it has a single unique
514 * definition that dominates all of its uses; we can copy the
515 * parent_instr from the SSA def safely.
517 if (def
->parent_instr
->type
!= nir_instr_type_ssa_undef
)
518 reg
->parent_instr
= def
->parent_instr
;
520 _mesa_hash_table_insert(state
->ssa_table
, def
, reg
);
526 rewrite_ssa_src(nir_src
*src
, void *void_state
)
528 struct from_ssa_state
*state
= void_state
;
531 nir_register
*reg
= get_register_for_ssa_def(src
->ssa
, state
);
534 assert(src
->ssa
->parent_instr
->type
== nir_instr_type_load_const
);
538 memset(src
, 0, sizeof *src
);
541 /* We don't need to remove it from the uses set because that is going
542 * away. We just need to add it to the one for the register. */
543 _mesa_set_add(reg
->uses
, state
->instr
);
550 rewrite_ssa_dest(nir_dest
*dest
, void *void_state
)
552 struct from_ssa_state
*state
= void_state
;
555 nir_register
*reg
= get_register_for_ssa_def(&dest
->ssa
, state
);
558 assert(dest
->ssa
.parent_instr
->type
== nir_instr_type_load_const
);
562 _mesa_set_destroy(dest
->ssa
.uses
, NULL
);
563 _mesa_set_destroy(dest
->ssa
.if_uses
, NULL
);
565 memset(dest
, 0, sizeof *dest
);
568 _mesa_set_add(reg
->defs
, state
->instr
);
574 /* Resolves ssa definitions to registers. While we're at it, we also
575 * remove phi nodes and ssa_undef instructions
578 resolve_registers_block(nir_block
*block
, void *void_state
)
580 struct from_ssa_state
*state
= void_state
;
582 nir_foreach_instr_safe(block
, instr
) {
583 state
->instr
= instr
;
584 nir_foreach_src(instr
, rewrite_ssa_src
, state
);
585 nir_foreach_dest(instr
, rewrite_ssa_dest
, state
);
587 if (instr
->type
== nir_instr_type_ssa_undef
||
588 instr
->type
== nir_instr_type_phi
) {
589 nir_instr_remove(instr
);
590 ralloc_steal(state
->dead_ctx
, instr
);
595 nir_if
*following_if
= nir_block_get_following_if(block
);
596 if (following_if
&& following_if
->condition
.is_ssa
) {
597 nir_register
*reg
= get_register_for_ssa_def(following_if
->condition
.ssa
,
600 memset(&following_if
->condition
, 0, sizeof following_if
->condition
);
601 following_if
->condition
.reg
.reg
= reg
;
603 _mesa_set_add(reg
->if_uses
, following_if
);
605 /* FIXME: We really shouldn't hit this. We should be doing
606 * constant control flow propagation.
608 assert(following_if
->condition
.ssa
->parent_instr
->type
== nir_instr_type_load_const
);
616 emit_copy(nir_parallel_copy_instr
*pcopy
, nir_src src
, nir_src dest_src
,
619 assert(!dest_src
.is_ssa
&&
620 dest_src
.reg
.indirect
== NULL
&&
621 dest_src
.reg
.base_offset
== 0);
624 assert(src
.ssa
->num_components
>= dest_src
.reg
.reg
->num_components
);
626 assert(src
.reg
.reg
->num_components
>= dest_src
.reg
.reg
->num_components
);
628 nir_alu_instr
*mov
= nir_alu_instr_create(mem_ctx
, nir_op_imov
);
629 nir_src_copy(&mov
->src
[0].src
, &src
, mem_ctx
);
630 mov
->dest
.dest
= nir_dest_for_reg(dest_src
.reg
.reg
);
631 mov
->dest
.write_mask
= (1 << dest_src
.reg
.reg
->num_components
) - 1;
633 nir_instr_insert_before(&pcopy
->instr
, &mov
->instr
);
636 /* Resolves a single parallel copy operation into a sequence of mov's
638 * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
639 * Correctness, Code Quality, and Efficiency" by Boissinot et. al..
640 * However, I never got the algorithm to work as written, so this version
641 * is slightly modified.
643 * The algorithm works by playing this little shell game with the values.
644 * We start by recording where every source value is and which source value
645 * each destination value should receive. We then grab any copy whose
646 * destination is "empty", i.e. not used as a source, and do the following:
647 * - Find where its source value currently lives
648 * - Emit the move instruction
649 * - Set the location of the source value to the destination
650 * - Mark the location containing the source value
651 * - Mark the destination as no longer needing to be copied
653 * When we run out of "empty" destinations, we have a cycle and so we
654 * create a temporary register, copy to that register, and mark the value
655 * we copied as living in that temporary. Now, the cycle is broken, so we
656 * can continue with the above steps.
659 resolve_parallel_copy(nir_parallel_copy_instr
*pcopy
,
660 struct from_ssa_state
*state
)
662 unsigned num_copies
= 0;
663 nir_foreach_parallel_copy_entry(pcopy
, entry
) {
664 /* Sources may be SSA */
665 if (!entry
->src
.is_ssa
&& entry
->src
.reg
.reg
== entry
->dest
.reg
.reg
)
671 if (num_copies
== 0) {
672 /* Hooray, we don't need any copies! */
673 nir_instr_remove(&pcopy
->instr
);
677 /* The register/source corresponding to the given index */
678 NIR_VLA_ZERO(nir_src
, values
, num_copies
* 2);
680 /* The current location of a given piece of data. We will use -1 for "null" */
681 NIR_VLA_FILL(int, loc
, num_copies
* 2, -1);
683 /* The piece of data that the given piece of data is to be copied from. We will use -1 for "null" */
684 NIR_VLA_FILL(int, pred
, num_copies
* 2, -1);
686 /* The destinations we have yet to properly fill */
687 NIR_VLA(int, to_do
, num_copies
* 2);
690 /* Now we set everything up:
691 * - All values get assigned a temporary index
692 * - Current locations are set from sources
693 * - Predicessors are recorded from sources and destinations
696 nir_foreach_parallel_copy_entry(pcopy
, entry
) {
697 /* Sources may be SSA */
698 if (!entry
->src
.is_ssa
&& entry
->src
.reg
.reg
== entry
->dest
.reg
.reg
)
702 for (int i
= 0; i
< num_vals
; ++i
) {
703 if (nir_srcs_equal(values
[i
], entry
->src
))
707 src_idx
= num_vals
++;
708 values
[src_idx
] = entry
->src
;
711 nir_src dest_src
= nir_src_for_reg(entry
->dest
.reg
.reg
);
714 for (int i
= 0; i
< num_vals
; ++i
) {
715 if (nir_srcs_equal(values
[i
], dest_src
)) {
716 /* Each destination of a parallel copy instruction should be
717 * unique. A destination may get used as a source, so we still
718 * have to walk the list. However, the predecessor should not,
719 * at this point, be set yet, so we should have -1 here.
721 assert(pred
[i
] == -1);
726 dest_idx
= num_vals
++;
727 values
[dest_idx
] = dest_src
;
730 loc
[src_idx
] = src_idx
;
731 pred
[dest_idx
] = src_idx
;
733 to_do
[++to_do_idx
] = dest_idx
;
736 /* Currently empty destinations we can go ahead and fill */
737 NIR_VLA(int, ready
, num_copies
* 2);
740 /* Mark the ones that are ready for copying. We know an index is a
741 * destination if it has a predecessor and it's ready for copying if
742 * it's not marked as containing data.
744 for (int i
= 0; i
< num_vals
; i
++) {
745 if (pred
[i
] != -1 && loc
[i
] == -1)
746 ready
[++ready_idx
] = i
;
749 while (to_do_idx
>= 0) {
750 while (ready_idx
>= 0) {
751 int b
= ready
[ready_idx
--];
753 emit_copy(pcopy
, values
[loc
[a
]], values
[b
], state
->mem_ctx
);
755 /* If any other copies want a they can find it at b */
758 /* b has been filled, mark it as not needing to be copied */
761 /* If a needs to be filled, it's ready for copying now */
763 ready
[++ready_idx
] = a
;
765 int b
= to_do
[to_do_idx
--];
769 /* If we got here, then we don't have any more trivial copies that we
770 * can do. We have to break a cycle, so we create a new temporary
771 * register for that purpose. Normally, if going out of SSA after
772 * register allocation, you would want to avoid creating temporary
773 * registers. However, we are going out of SSA before register
774 * allocation, so we would rather not create extra register
775 * dependencies for the backend to deal with. If it wants, the
776 * backend can coalesce the (possibly multiple) temporaries.
778 assert(num_vals
< num_copies
* 2);
779 nir_register
*reg
= nir_local_reg_create(state
->impl
);
780 reg
->name
= "copy_temp";
781 reg
->num_array_elems
= 0;
782 if (values
[b
].is_ssa
)
783 reg
->num_components
= values
[b
].ssa
->num_components
;
785 reg
->num_components
= values
[b
].reg
.reg
->num_components
;
786 values
[num_vals
].is_ssa
= false;
787 values
[num_vals
].reg
.reg
= reg
;
789 emit_copy(pcopy
, values
[b
], values
[num_vals
], state
->mem_ctx
);
791 ready
[++ready_idx
] = b
;
795 nir_instr_remove(&pcopy
->instr
);
798 /* Resolves the parallel copies in a block. Each block can have at most
799 * two: One at the beginning, right after all the phi noces, and one at
800 * the end (or right before the final jump if it exists).
803 resolve_parallel_copies_block(nir_block
*block
, void *void_state
)
805 struct from_ssa_state
*state
= void_state
;
807 /* At this point, we have removed all of the phi nodes. If a parallel
808 * copy existed right after the phi nodes in this block, it is now the
811 nir_instr
*first_instr
= nir_block_first_instr(block
);
812 if (first_instr
== NULL
)
813 return true; /* Empty, nothing to do. */
815 if (first_instr
->type
== nir_instr_type_parallel_copy
) {
816 nir_parallel_copy_instr
*pcopy
= nir_instr_as_parallel_copy(first_instr
);
818 resolve_parallel_copy(pcopy
, state
);
821 /* It's possible that the above code already cleaned up the end parallel
822 * copy. However, doing so removed it form the instructions list so we
823 * won't find it here. Therefore, it's safe to go ahead and just look
824 * for one and clean it up if it exists.
826 nir_parallel_copy_instr
*end_pcopy
=
827 get_parallel_copy_at_end_of_block(block
);
829 resolve_parallel_copy(end_pcopy
, state
);
835 nir_convert_from_ssa_impl(nir_function_impl
*impl
)
837 struct from_ssa_state state
;
839 state
.mem_ctx
= ralloc_parent(impl
);
840 state
.dead_ctx
= ralloc_context(NULL
);
842 state
.merge_node_table
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
843 _mesa_key_pointer_equal
);
845 nir_foreach_block(impl
, add_parallel_copy_to_end_of_block
, &state
);
846 nir_foreach_block(impl
, isolate_phi_nodes_block
, &state
);
848 /* Mark metadata as dirty before we ask for liveness analysis */
849 nir_metadata_preserve(impl
, nir_metadata_block_index
|
850 nir_metadata_dominance
);
852 nir_metadata_require(impl
, nir_metadata_live_variables
|
853 nir_metadata_dominance
);
855 nir_foreach_block(impl
, coalesce_phi_nodes_block
, &state
);
856 nir_foreach_block(impl
, agressive_coalesce_block
, &state
);
858 state
.ssa_table
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
859 _mesa_key_pointer_equal
);
860 nir_foreach_block(impl
, resolve_registers_block
, &state
);
862 nir_foreach_block(impl
, resolve_parallel_copies_block
, &state
);
864 nir_metadata_preserve(impl
, nir_metadata_block_index
|
865 nir_metadata_dominance
);
867 /* Clean up dead instructions and the hash tables */
868 _mesa_hash_table_destroy(state
.ssa_table
, NULL
);
869 _mesa_hash_table_destroy(state
.merge_node_table
, NULL
);
870 ralloc_free(state
.dead_ctx
);
874 nir_convert_from_ssa(nir_shader
*shader
)
876 nir_foreach_overload(shader
, overload
) {
878 nir_convert_from_ssa_impl(overload
->impl
);