2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
31 * This file implements an out-of-SSA pass as described in "Revisiting
32 * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
36 struct from_ssa_state
{
39 struct hash_table
*ssa_table
;
40 struct hash_table
*merge_node_table
;
42 nir_function_impl
*impl
;
45 /* Returns true if a dominates b */
47 ssa_def_dominates(nir_ssa_def
*a
, nir_ssa_def
*b
)
49 if (a
->live_index
== 0) {
50 /* SSA undefs always dominate */
52 } else if (b
->live_index
< a
->live_index
) {
54 } else if (a
->parent_instr
->block
== b
->parent_instr
->block
) {
55 return a
->live_index
<= b
->live_index
;
57 nir_block
*block
= b
->parent_instr
->block
;
58 while (block
->imm_dom
!= NULL
) {
59 if (block
->imm_dom
== a
->parent_instr
->block
)
61 block
= block
->imm_dom
;
68 /* The following data structure, which I have named merge_set is a way of
69 * representing a set registers of non-interfering registers. This is
70 * based on the concept of a "dominence forest" presented in "Fast Copy
71 * Coalescing and Live-Range Identification" by Budimlic et. al. but the
72 * implementation concept is taken from "Revisiting Out-of-SSA Translation
73 * for Correctness, Code Quality, and Efficiency" by Boissinot et. al..
75 * Each SSA definition is associated with a merge_node and the association
76 * is represented by a combination of a hash table and the "def" parameter
77 * in the merge_node structure. The merge_set stores a linked list of
78 * merge_node's in dominence order of the ssa definitions. (Since the
79 * liveness analysis pass indexes the SSA values in dominence order for us,
80 * this is an easy thing to keep up.) It is assumed that no pair of the
81 * nodes in a given set interfere. Merging two sets or checking for
82 * interference can be done in a single linear-time merge-sort walk of the
88 struct exec_node node
;
89 struct merge_set
*set
;
93 typedef struct merge_set
{
94 struct exec_list nodes
;
101 merge_set_dump(merge_set
*set
, FILE *fp
)
103 nir_ssa_def
*dom
[set
->size
];
106 foreach_list_typed(merge_node
, node
, node
, &set
->nodes
) {
107 while (dom_idx
>= 0 && !ssa_def_dominates(dom
[dom_idx
], node
->def
))
110 for (int i
= 0; i
<= dom_idx
; i
++)
114 fprintf(fp
, "ssa_%d /* %s */\n", node
->def
->index
, node
->def
->name
);
116 fprintf(fp
, "ssa_%d\n", node
->def
->index
);
118 dom
[++dom_idx
] = node
->def
;
124 get_merge_node(nir_ssa_def
*def
, struct from_ssa_state
*state
)
126 struct hash_entry
*entry
=
127 _mesa_hash_table_search(state
->merge_node_table
, def
);
131 merge_set
*set
= ralloc(state
->dead_ctx
, merge_set
);
132 exec_list_make_empty(&set
->nodes
);
136 merge_node
*node
= ralloc(state
->dead_ctx
, merge_node
);
139 exec_list_push_head(&set
->nodes
, &node
->node
);
141 _mesa_hash_table_insert(state
->merge_node_table
, def
, node
);
147 merge_nodes_interfere(merge_node
*a
, merge_node
*b
)
149 return nir_ssa_defs_interfere(a
->def
, b
->def
);
152 /* Merges b into a */
154 merge_merge_sets(merge_set
*a
, merge_set
*b
)
156 struct exec_node
*an
= exec_list_get_head(&a
->nodes
);
157 struct exec_node
*bn
= exec_list_get_head(&b
->nodes
);
158 while (!exec_node_is_tail_sentinel(bn
)) {
159 merge_node
*a_node
= exec_node_data(merge_node
, an
, node
);
160 merge_node
*b_node
= exec_node_data(merge_node
, bn
, node
);
162 if (exec_node_is_tail_sentinel(an
) ||
163 a_node
->def
->live_index
> b_node
->def
->live_index
) {
164 struct exec_node
*next
= bn
->next
;
165 exec_node_remove(bn
);
166 exec_node_insert_node_before(an
, bn
);
167 exec_node_data(merge_node
, bn
, node
)->set
= a
;
180 /* Checks for any interference between two merge sets
182 * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
183 * Translation for Correctness, Code Quality, and Efficiency" by
187 merge_sets_interfere(merge_set
*a
, merge_set
*b
)
189 merge_node
*dom
[a
->size
+ b
->size
];
192 struct exec_node
*an
= exec_list_get_head(&a
->nodes
);
193 struct exec_node
*bn
= exec_list_get_head(&b
->nodes
);
194 while (!exec_node_is_tail_sentinel(an
) ||
195 !exec_node_is_tail_sentinel(bn
)) {
198 if (exec_node_is_tail_sentinel(an
)) {
199 current
= exec_node_data(merge_node
, bn
, node
);
201 } else if (exec_node_is_tail_sentinel(bn
)) {
202 current
= exec_node_data(merge_node
, an
, node
);
205 merge_node
*a_node
= exec_node_data(merge_node
, an
, node
);
206 merge_node
*b_node
= exec_node_data(merge_node
, bn
, node
);
208 if (a_node
->def
->live_index
<= b_node
->def
->live_index
) {
217 while (dom_idx
>= 0 &&
218 !ssa_def_dominates(dom
[dom_idx
]->def
, current
->def
))
221 if (dom_idx
>= 0 && merge_nodes_interfere(current
, dom
[dom_idx
]))
224 dom
[++dom_idx
] = current
;
230 static nir_parallel_copy_instr
*
231 block_get_parallel_copy_at_end(nir_block
*block
, void *mem_ctx
)
233 nir_instr
*last_instr
= nir_block_last_instr(block
);
235 /* First we try and find a parallel copy if it already exists. If the
236 * last instruction is a jump, it will be right before the jump;
237 * otherwise, it will be the last instruction.
239 nir_instr
*pcopy_instr
;
240 if (last_instr
!= NULL
&& last_instr
->type
== nir_instr_type_jump
)
241 pcopy_instr
= nir_instr_prev(last_instr
);
243 pcopy_instr
= last_instr
;
245 if (pcopy_instr
!= NULL
&&
246 pcopy_instr
->type
== nir_instr_type_parallel_copy
) {
247 /* A parallel copy already exists. */
248 nir_parallel_copy_instr
*pcopy
= nir_instr_as_parallel_copy(pcopy_instr
);
250 /* This parallel copy may be the copy for the beginning of some
251 * block, so we need to check for that before we return it.
257 /* At this point, we haven't found a suitable parallel copy, so we
258 * have to create one.
260 nir_parallel_copy_instr
*pcopy
= nir_parallel_copy_instr_create(mem_ctx
);
261 pcopy
->at_end
= true;
263 if (last_instr
&& last_instr
->type
== nir_instr_type_jump
) {
264 nir_instr_insert_before(last_instr
, &pcopy
->instr
);
266 nir_instr_insert_after_block(block
, &pcopy
->instr
);
273 isolate_phi_nodes_block(nir_block
*block
, void *void_state
)
275 struct from_ssa_state
*state
= void_state
;
277 nir_instr
*last_phi_instr
= NULL
;
278 nir_foreach_instr(block
, instr
) {
279 /* Phi nodes only ever come at the start of a block */
280 if (instr
->type
!= nir_instr_type_phi
)
283 last_phi_instr
= instr
;
286 /* If we don't have any phi's, then there's nothing for us to do. */
287 if (last_phi_instr
== NULL
)
290 /* If we have phi nodes, we need to create a parallel copy at the
291 * start of this block but after the phi nodes.
293 nir_parallel_copy_instr
*block_pcopy
=
294 nir_parallel_copy_instr_create(state
->dead_ctx
);
295 nir_instr_insert_after(last_phi_instr
, &block_pcopy
->instr
);
297 nir_foreach_instr(block
, instr
) {
298 /* Phi nodes only ever come at the start of a block */
299 if (instr
->type
!= nir_instr_type_phi
)
302 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
303 assert(phi
->dest
.is_ssa
);
304 foreach_list_typed(nir_phi_src
, src
, node
, &phi
->srcs
) {
305 nir_parallel_copy_instr
*pcopy
=
306 block_get_parallel_copy_at_end(src
->pred
, state
->dead_ctx
);
308 nir_parallel_copy_copy
*copy
= ralloc(state
->dead_ctx
,
309 nir_parallel_copy_copy
);
310 exec_list_push_tail(&pcopy
->copies
, ©
->node
);
312 copy
->src
= nir_src_copy(src
->src
, state
->dead_ctx
);
313 _mesa_set_add(src
->src
.ssa
->uses
,
314 _mesa_hash_pointer(&pcopy
->instr
), &pcopy
->instr
);
316 copy
->dest
.is_ssa
= true;
317 nir_ssa_def_init(&pcopy
->instr
, ©
->dest
.ssa
,
318 phi
->dest
.ssa
.num_components
, src
->src
.ssa
->name
);
320 struct set_entry
*entry
= _mesa_set_search(src
->src
.ssa
->uses
,
321 _mesa_hash_pointer(instr
),
324 /* It is possible that a phi node can use the same source twice
325 * but for different basic blocks. If that happens, entry will
326 * be NULL because we already deleted it. This is safe
327 * because, by the time the loop is done, we will have deleted
328 * all of the sources of the phi from their respective use sets
329 * and moved them to the parallel copy definitions.
331 _mesa_set_remove(src
->src
.ssa
->uses
, entry
);
333 src
->src
.ssa
= ©
->dest
.ssa
;
334 _mesa_set_add(copy
->dest
.ssa
.uses
, _mesa_hash_pointer(instr
), instr
);
337 nir_parallel_copy_copy
*copy
= ralloc(state
->dead_ctx
,
338 nir_parallel_copy_copy
);
339 exec_list_push_tail(&block_pcopy
->copies
, ©
->node
);
341 copy
->dest
.is_ssa
= true;
342 nir_ssa_def_init(&block_pcopy
->instr
, ©
->dest
.ssa
,
343 phi
->dest
.ssa
.num_components
, phi
->dest
.ssa
.name
);
345 nir_src copy_dest_src
= {
346 .ssa
= ©
->dest
.ssa
,
349 nir_ssa_def_rewrite_uses(&phi
->dest
.ssa
, copy_dest_src
, state
->mem_ctx
);
351 copy
->src
.is_ssa
= true;
352 copy
->src
.ssa
= &phi
->dest
.ssa
;
353 _mesa_set_add(phi
->dest
.ssa
.uses
,
354 _mesa_hash_pointer(&block_pcopy
->instr
),
355 &block_pcopy
->instr
);
362 coalesce_phi_nodes_block(nir_block
*block
, void *void_state
)
364 struct from_ssa_state
*state
= void_state
;
366 nir_foreach_instr(block
, instr
) {
367 /* Phi nodes only ever come at the start of a block */
368 if (instr
->type
!= nir_instr_type_phi
)
371 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
373 assert(phi
->dest
.is_ssa
);
374 merge_node
*dest_node
= get_merge_node(&phi
->dest
.ssa
, state
);
376 foreach_list_typed(nir_phi_src
, src
, node
, &phi
->srcs
) {
377 assert(src
->src
.is_ssa
);
378 merge_node
*src_node
= get_merge_node(src
->src
.ssa
, state
);
379 if (src_node
->set
!= dest_node
->set
)
380 merge_merge_sets(dest_node
->set
, src_node
->set
);
388 agressive_coalesce_parallel_copy(nir_parallel_copy_instr
*pcopy
,
389 struct from_ssa_state
*state
)
391 foreach_list_typed_safe(nir_parallel_copy_copy
, copy
, node
, &pcopy
->copies
) {
392 if (!copy
->src
.is_ssa
)
395 /* Don't try and coalesce these */
396 if (copy
->dest
.ssa
.num_components
!= copy
->src
.ssa
->num_components
)
399 merge_node
*src_node
= get_merge_node(copy
->src
.ssa
, state
);
400 merge_node
*dest_node
= get_merge_node(©
->dest
.ssa
, state
);
402 if (src_node
->set
== dest_node
->set
)
405 if (!merge_sets_interfere(src_node
->set
, dest_node
->set
))
406 merge_merge_sets(src_node
->set
, dest_node
->set
);
411 agressive_coalesce_block(nir_block
*block
, void *void_state
)
413 struct from_ssa_state
*state
= void_state
;
415 nir_foreach_instr(block
, instr
) {
416 /* Phi nodes only ever come at the start of a block */
417 if (instr
->type
!= nir_instr_type_phi
) {
418 if (instr
->type
!= nir_instr_type_parallel_copy
)
419 break; /* The parallel copy must be right after the phis */
421 nir_parallel_copy_instr
*pcopy
= nir_instr_as_parallel_copy(instr
);
423 agressive_coalesce_parallel_copy(pcopy
, state
);
432 nir_instr
*last_instr
= nir_block_last_instr(block
);
433 if (last_instr
&& last_instr
->type
== nir_instr_type_parallel_copy
) {
434 nir_parallel_copy_instr
*pcopy
= nir_instr_as_parallel_copy(last_instr
);
436 agressive_coalesce_parallel_copy(pcopy
, state
);
442 static nir_register
*
443 get_register_for_ssa_def(nir_ssa_def
*def
, struct from_ssa_state
*state
)
445 struct hash_entry
*entry
=
446 _mesa_hash_table_search(state
->merge_node_table
, def
);
448 merge_node
*node
= (merge_node
*)entry
->data
;
450 /* If it doesn't have a register yet, create one. Note that all of
451 * the things in the merge set should be the same so it doesn't
452 * matter which node's definition we use.
454 if (node
->set
->reg
== NULL
) {
455 node
->set
->reg
= nir_local_reg_create(state
->impl
);
456 node
->set
->reg
->name
= def
->name
;
457 node
->set
->reg
->num_components
= def
->num_components
;
458 node
->set
->reg
->num_array_elems
= 0;
461 return node
->set
->reg
;
464 entry
= _mesa_hash_table_search(state
->ssa_table
, def
);
466 return (nir_register
*)entry
->data
;
468 /* We leave load_const SSA values alone. They act as immediates to
469 * the backend. If it got coalesced into a phi, that's ok.
471 if (def
->parent_instr
->type
== nir_instr_type_load_const
)
474 nir_register
*reg
= nir_local_reg_create(state
->impl
);
475 reg
->name
= def
->name
;
476 reg
->num_components
= def
->num_components
;
477 reg
->num_array_elems
= 0;
479 _mesa_hash_table_insert(state
->ssa_table
, def
, reg
);
485 rewrite_ssa_src(nir_src
*src
, void *void_state
)
487 struct from_ssa_state
*state
= void_state
;
490 nir_register
*reg
= get_register_for_ssa_def(src
->ssa
, state
);
493 assert(src
->ssa
->parent_instr
->type
== nir_instr_type_load_const
);
497 memset(src
, 0, sizeof *src
);
500 /* We don't need to remove it from the uses set because that is going
501 * away. We just need to add it to the one for the register. */
502 _mesa_set_add(reg
->uses
, _mesa_hash_pointer(state
->instr
), state
->instr
);
509 rewrite_ssa_dest(nir_dest
*dest
, void *void_state
)
511 struct from_ssa_state
*state
= void_state
;
514 nir_register
*reg
= get_register_for_ssa_def(&dest
->ssa
, state
);
517 assert(dest
->ssa
.parent_instr
->type
== nir_instr_type_load_const
);
521 _mesa_set_destroy(dest
->ssa
.uses
, NULL
);
522 _mesa_set_destroy(dest
->ssa
.if_uses
, NULL
);
524 memset(dest
, 0, sizeof *dest
);
527 _mesa_set_add(reg
->defs
, _mesa_hash_pointer(state
->instr
), state
->instr
);
533 /* Resolves ssa definitions to registers. While we're at it, we also
534 * remove phi nodes and ssa_undef instructions
537 resolve_registers_block(nir_block
*block
, void *void_state
)
539 struct from_ssa_state
*state
= void_state
;
541 nir_foreach_instr_safe(block
, instr
) {
542 state
->instr
= instr
;
543 nir_foreach_src(instr
, rewrite_ssa_src
, state
);
544 nir_foreach_dest(instr
, rewrite_ssa_dest
, state
);
546 if (instr
->type
== nir_instr_type_ssa_undef
||
547 instr
->type
== nir_instr_type_phi
) {
548 nir_instr_remove(instr
);
549 ralloc_steal(state
->dead_ctx
, instr
);
554 nir_if
*following_if
= nir_block_following_if(block
);
555 if (following_if
&& following_if
->condition
.is_ssa
) {
556 nir_register
*reg
= get_register_for_ssa_def(following_if
->condition
.ssa
,
559 memset(&following_if
->condition
, 0, sizeof following_if
->condition
);
560 following_if
->condition
.reg
.reg
= reg
;
562 _mesa_set_add(reg
->if_uses
, _mesa_hash_pointer(following_if
),
565 /* FIXME: We really shouldn't hit this. We should be doing
566 * constant control flow propagation.
568 assert(following_if
->condition
.ssa
->parent_instr
->type
== nir_instr_type_load_const
);
576 emit_copy(nir_parallel_copy_instr
*pcopy
, nir_src src
, nir_src dest_src
,
579 assert(!dest_src
.is_ssa
&&
580 dest_src
.reg
.indirect
== NULL
&&
581 dest_src
.reg
.base_offset
== 0);
583 .reg
.reg
= dest_src
.reg
.reg
,
584 .reg
.indirect
= NULL
,
585 .reg
.base_offset
= 0,
590 assert(src
.ssa
->num_components
>= dest
.reg
.reg
->num_components
);
592 assert(src
.reg
.reg
->num_components
>= dest
.reg
.reg
->num_components
);
594 nir_alu_instr
*mov
= nir_alu_instr_create(mem_ctx
, nir_op_imov
);
595 mov
->src
[0].src
= nir_src_copy(src
, mem_ctx
);
596 mov
->dest
.dest
= nir_dest_copy(dest
, mem_ctx
);
597 mov
->dest
.write_mask
= (1 << dest
.reg
.reg
->num_components
) - 1;
599 nir_instr_insert_before(&pcopy
->instr
, &mov
->instr
);
602 /* Resolves a single parallel copy operation into a sequence of mov's
604 * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
605 * Correctness, Code Quality, and Efficiency" by Boissinot et. al..
606 * However, I never got the algorithm to work as written, so this version
607 * is slightly modified.
609 * The algorithm works by playing this little shell game with the values.
610 * We start by recording where every source value is and which source value
611 * each destination value should recieve. We then grab any copy whose
612 * destination is "empty", i.e. not used as a source, and do the following:
613 * - Find where its source value currently lives
614 * - Emit the move instruction
615 * - Set the location of the source value to the destination
616 * - Mark the location containing the source value
617 * - Mark the destination as no longer needing to be copied
619 * When we run out of "empty" destinations, we have a cycle and so we
620 * create a temporary register, copy to that register, and mark the value
621 * we copied as living in that temporary. Now, the cycle is broken, so we
622 * can continue with the above steps.
625 resolve_parallel_copy(nir_parallel_copy_instr
*pcopy
,
626 struct from_ssa_state
*state
)
628 unsigned num_copies
= 0;
629 foreach_list_typed_safe(nir_parallel_copy_copy
, copy
, node
, &pcopy
->copies
) {
630 /* Sources may be SSA */
631 if (!copy
->src
.is_ssa
&& copy
->src
.reg
.reg
== copy
->dest
.reg
.reg
)
634 /* Set both indices equal to UINT_MAX to mark them as not indexed yet. */
638 if (num_copies
== 0) {
639 /* Hooray, we don't need any copies! */
640 nir_instr_remove(&pcopy
->instr
);
644 /* The register/source corresponding to the given index */
645 nir_src values
[num_copies
* 2];
646 memset(values
, 0, sizeof values
);
648 /* The current location of a given piece of data */
649 int loc
[num_copies
* 2];
651 /* The piece of data that the given piece of data is to be copied from */
652 int pred
[num_copies
* 2];
654 /* Initialize loc and pred. We will use -1 for "null" */
655 memset(loc
, -1, sizeof loc
);
656 memset(pred
, -1, sizeof pred
);
658 /* The destinations we have yet to properly fill */
659 int to_do
[num_copies
* 2];
662 /* Now we set everything up:
663 * - All values get assigned a temporary index
664 * - Current locations are set from sources
665 * - Predicessors are recorded from sources and destinations
668 foreach_list_typed(nir_parallel_copy_copy
, copy
, node
, &pcopy
->copies
) {
669 /* Sources may be SSA */
670 if (!copy
->src
.is_ssa
&& copy
->src
.reg
.reg
== copy
->dest
.reg
.reg
)
674 for (int i
= 0; i
< num_vals
; ++i
) {
675 if (nir_srcs_equal(values
[i
], copy
->src
))
679 src_idx
= num_vals
++;
680 values
[src_idx
] = copy
->src
;
684 .reg
.reg
= copy
->dest
.reg
.reg
,
685 .reg
.indirect
= NULL
,
686 .reg
.base_offset
= 0,
691 for (int i
= 0; i
< num_vals
; ++i
) {
692 if (nir_srcs_equal(values
[i
], dest_src
)) {
693 /* Each destination of a parallel copy instruction should be
694 * unique. A destination may get used as a source, so we still
695 * have to walk the list. However, the predecessor should not,
696 * at this point, be set yet, so we should have -1 here.
698 assert(pred
[i
] == -1);
703 dest_idx
= num_vals
++;
704 values
[dest_idx
] = dest_src
;
707 loc
[src_idx
] = src_idx
;
708 pred
[dest_idx
] = src_idx
;
710 to_do
[++to_do_idx
] = dest_idx
;
713 /* Currently empty destinations we can go ahead and fill */
714 int ready
[num_copies
* 2];
717 /* Mark the ones that are ready for copying. We know an index is a
718 * destination if it has a predecessor and it's ready for copying if
719 * it's not marked as containing data.
721 for (int i
= 0; i
< num_vals
; i
++) {
722 if (pred
[i
] != -1 && loc
[i
] == -1)
723 ready
[++ready_idx
] = i
;
726 while (to_do_idx
>= 0) {
727 while (ready_idx
>= 0) {
728 int b
= ready
[ready_idx
--];
730 emit_copy(pcopy
, values
[loc
[a
]], values
[b
], state
->mem_ctx
);
732 /* If any other copies want a they can find it at b */
735 /* b has been filled, mark it as not needing to be copied */
738 /* If a needs to be filled, it's ready for copying now */
740 ready
[++ready_idx
] = a
;
742 int b
= to_do
[to_do_idx
--];
746 /* If we got here, then we don't have any more trivial copies that we
747 * can do. We have to break a cycle, so we create a new temporary
748 * register for that purpose. Normally, if going out of SSA after
749 * register allocation, you would want to avoid creating temporary
750 * registers. However, we are going out of SSA before register
751 * allocation, so we would rather not create extra register
752 * dependencies for the backend to deal with. If it wants, the
753 * backend can coalesce the (possibly multiple) temporaries.
755 assert(num_vals
< num_copies
* 2);
756 nir_register
*reg
= nir_local_reg_create(state
->impl
);
757 reg
->name
= "copy_temp";
758 reg
->num_array_elems
= 0;
759 if (values
[b
].is_ssa
)
760 reg
->num_components
= values
[b
].ssa
->num_components
;
762 reg
->num_components
= values
[b
].reg
.reg
->num_components
;
763 values
[num_vals
].is_ssa
= false;
764 values
[num_vals
].reg
.reg
= reg
;
766 emit_copy(pcopy
, values
[b
], values
[num_vals
], state
->mem_ctx
);
768 ready
[++ready_idx
] = b
;
772 nir_instr_remove(&pcopy
->instr
);
775 /* Resolves the parallel copies in a block. Each block can have at most
776 * two: One at the beginning, right after all the phi noces, and one at
777 * the end (or right before the final jump if it exists).
780 resolve_parallel_copies_block(nir_block
*block
, void *void_state
)
782 struct from_ssa_state
*state
= void_state
;
784 /* At this point, we have removed all of the phi nodes. If a parallel
785 * copy existed right after the phi nodes in this block, it is now the
788 nir_instr
*first_instr
= nir_block_first_instr(block
);
789 if (first_instr
== NULL
)
790 return true; /* Empty, nothing to do. */
792 if (first_instr
->type
== nir_instr_type_parallel_copy
) {
793 nir_parallel_copy_instr
*pcopy
= nir_instr_as_parallel_copy(first_instr
);
795 resolve_parallel_copy(pcopy
, state
);
798 nir_instr
*last_instr
= nir_block_last_instr(block
);
799 if (last_instr
== NULL
)
800 return true; /* Now empty, nothing to do. */
802 /* If the last instruction is a jump, the parallel copy will be before
805 if (last_instr
->type
== nir_instr_type_jump
)
806 last_instr
= nir_instr_prev(last_instr
);
808 if (last_instr
&& last_instr
->type
== nir_instr_type_parallel_copy
) {
809 nir_parallel_copy_instr
*pcopy
= nir_instr_as_parallel_copy(last_instr
);
811 resolve_parallel_copy(pcopy
, state
);
818 nir_convert_from_ssa_impl(nir_function_impl
*impl
)
820 struct from_ssa_state state
;
822 state
.mem_ctx
= ralloc_parent(impl
);
823 state
.dead_ctx
= ralloc_context(NULL
);
825 state
.merge_node_table
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
826 _mesa_key_pointer_equal
);
828 nir_foreach_block(impl
, isolate_phi_nodes_block
, &state
);
830 /* Mark metadata as dirty before we ask for liveness analysis */
831 nir_metadata_preserve(impl
, nir_metadata_block_index
|
832 nir_metadata_dominance
);
834 nir_metadata_require(impl
, nir_metadata_live_variables
|
835 nir_metadata_dominance
);
837 nir_foreach_block(impl
, coalesce_phi_nodes_block
, &state
);
838 nir_foreach_block(impl
, agressive_coalesce_block
, &state
);
840 state
.ssa_table
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
841 _mesa_key_pointer_equal
);
842 nir_foreach_block(impl
, resolve_registers_block
, &state
);
844 nir_foreach_block(impl
, resolve_parallel_copies_block
, &state
);
846 nir_metadata_preserve(impl
, nir_metadata_block_index
|
847 nir_metadata_dominance
);
849 /* Clean up dead instructions and the hash tables */
850 _mesa_hash_table_destroy(state
.ssa_table
, NULL
);
851 _mesa_hash_table_destroy(state
.merge_node_table
, NULL
);
852 ralloc_free(state
.dead_ctx
);
856 nir_convert_from_ssa(nir_shader
*shader
)
858 nir_foreach_overload(shader
, overload
) {
860 nir_convert_from_ssa_impl(overload
->impl
);