2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
29 #include "nir_builder.h"
33 * This file implements an out-of-SSA pass as described in "Revisiting
34 * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
38 struct from_ssa_state
{
42 struct hash_table
*merge_node_table
;
47 /* Returns true if a dominates b */
49 ssa_def_dominates(nir_ssa_def
*a
, nir_ssa_def
*b
)
51 if (a
->live_index
== 0) {
52 /* SSA undefs always dominate */
54 } else if (b
->live_index
< a
->live_index
) {
56 } else if (a
->parent_instr
->block
== b
->parent_instr
->block
) {
57 return a
->live_index
<= b
->live_index
;
59 return nir_block_dominates(a
->parent_instr
->block
,
60 b
->parent_instr
->block
);
65 /* The following data structure, which I have named merge_set is a way of
66 * representing a set registers of non-interfering registers. This is
67 * based on the concept of a "dominance forest" presented in "Fast Copy
68 * Coalescing and Live-Range Identification" by Budimlic et al. but the
69 * implementation concept is taken from "Revisiting Out-of-SSA Translation
70 * for Correctness, Code Quality, and Efficiency" by Boissinot et al.
72 * Each SSA definition is associated with a merge_node and the association
73 * is represented by a combination of a hash table and the "def" parameter
74 * in the merge_node structure. The merge_set stores a linked list of
75 * merge_nodes in dominance order of the ssa definitions. (Since the
76 * liveness analysis pass indexes the SSA values in dominance order for us,
77 * this is an easy thing to keep up.) It is assumed that no pair of the
78 * nodes in a given set interfere. Merging two sets or checking for
79 * interference can be done in a single linear-time merge-sort walk of the
85 struct exec_node node
;
86 struct merge_set
*set
;
90 typedef struct merge_set
{
91 struct exec_list nodes
;
98 merge_set_dump(merge_set
*set
, FILE *fp
)
100 nir_ssa_def
*dom
[set
->size
];
103 foreach_list_typed(merge_node
, node
, node
, &set
->nodes
) {
104 while (dom_idx
>= 0 && !ssa_def_dominates(dom
[dom_idx
], node
->def
))
107 for (int i
= 0; i
<= dom_idx
; i
++)
111 fprintf(fp
, "ssa_%d /* %s */\n", node
->def
->index
, node
->def
->name
);
113 fprintf(fp
, "ssa_%d\n", node
->def
->index
);
115 dom
[++dom_idx
] = node
->def
;
121 get_merge_node(nir_ssa_def
*def
, struct from_ssa_state
*state
)
123 struct hash_entry
*entry
=
124 _mesa_hash_table_search(state
->merge_node_table
, def
);
128 merge_set
*set
= ralloc(state
->dead_ctx
, merge_set
);
129 exec_list_make_empty(&set
->nodes
);
133 merge_node
*node
= ralloc(state
->dead_ctx
, merge_node
);
136 exec_list_push_head(&set
->nodes
, &node
->node
);
138 _mesa_hash_table_insert(state
->merge_node_table
, def
, node
);
144 merge_nodes_interfere(merge_node
*a
, merge_node
*b
)
146 return nir_ssa_defs_interfere(a
->def
, b
->def
);
149 /* Merges b into a */
151 merge_merge_sets(merge_set
*a
, merge_set
*b
)
153 struct exec_node
*an
= exec_list_get_head(&a
->nodes
);
154 struct exec_node
*bn
= exec_list_get_head(&b
->nodes
);
155 while (!exec_node_is_tail_sentinel(bn
)) {
156 merge_node
*a_node
= exec_node_data(merge_node
, an
, node
);
157 merge_node
*b_node
= exec_node_data(merge_node
, bn
, node
);
159 if (exec_node_is_tail_sentinel(an
) ||
160 a_node
->def
->live_index
> b_node
->def
->live_index
) {
161 struct exec_node
*next
= bn
->next
;
162 exec_node_remove(bn
);
163 exec_node_insert_node_before(an
, bn
);
164 exec_node_data(merge_node
, bn
, node
)->set
= a
;
177 /* Checks for any interference between two merge sets
179 * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
180 * Translation for Correctness, Code Quality, and Efficiency" by
184 merge_sets_interfere(merge_set
*a
, merge_set
*b
)
186 NIR_VLA(merge_node
*, dom
, a
->size
+ b
->size
);
189 struct exec_node
*an
= exec_list_get_head(&a
->nodes
);
190 struct exec_node
*bn
= exec_list_get_head(&b
->nodes
);
191 while (!exec_node_is_tail_sentinel(an
) ||
192 !exec_node_is_tail_sentinel(bn
)) {
195 if (exec_node_is_tail_sentinel(an
)) {
196 current
= exec_node_data(merge_node
, bn
, node
);
198 } else if (exec_node_is_tail_sentinel(bn
)) {
199 current
= exec_node_data(merge_node
, an
, node
);
202 merge_node
*a_node
= exec_node_data(merge_node
, an
, node
);
203 merge_node
*b_node
= exec_node_data(merge_node
, bn
, node
);
205 if (a_node
->def
->live_index
<= b_node
->def
->live_index
) {
214 while (dom_idx
>= 0 &&
215 !ssa_def_dominates(dom
[dom_idx
]->def
, current
->def
))
218 if (dom_idx
>= 0 && merge_nodes_interfere(current
, dom
[dom_idx
]))
221 dom
[++dom_idx
] = current
;
228 add_parallel_copy_to_end_of_block(nir_block
*block
, void *dead_ctx
)
231 bool need_end_copy
= false;
232 if (block
->successors
[0]) {
233 nir_instr
*instr
= nir_block_first_instr(block
->successors
[0]);
234 if (instr
&& instr
->type
== nir_instr_type_phi
)
235 need_end_copy
= true;
238 if (block
->successors
[1]) {
239 nir_instr
*instr
= nir_block_first_instr(block
->successors
[1]);
240 if (instr
&& instr
->type
== nir_instr_type_phi
)
241 need_end_copy
= true;
245 /* If one of our successors has at least one phi node, we need to
246 * create a parallel copy at the end of the block but before the jump
249 nir_parallel_copy_instr
*pcopy
=
250 nir_parallel_copy_instr_create(dead_ctx
);
252 nir_instr_insert(nir_after_block_before_jump(block
), &pcopy
->instr
);
258 static nir_parallel_copy_instr
*
259 get_parallel_copy_at_end_of_block(nir_block
*block
)
261 nir_instr
*last_instr
= nir_block_last_instr(block
);
262 if (last_instr
== NULL
)
265 /* The last instruction may be a jump in which case the parallel copy is
268 if (last_instr
->type
== nir_instr_type_jump
)
269 last_instr
= nir_instr_prev(last_instr
);
271 if (last_instr
&& last_instr
->type
== nir_instr_type_parallel_copy
)
272 return nir_instr_as_parallel_copy(last_instr
);
277 /** Isolate phi nodes with parallel copies
279 * In order to solve the dependency problems with the sources and
280 * destinations of phi nodes, we first isolate them by adding parallel
281 * copies to the beginnings and ends of basic blocks. For every block with
282 * phi nodes, we add a parallel copy immediately following the last phi
283 * node that copies the destinations of all of the phi nodes to new SSA
284 * values. We also add a parallel copy to the end of every block that has
285 * a successor with phi nodes that, for each phi node in each successor,
286 * copies the corresponding sorce of the phi node and adjust the phi to
287 * used the destination of the parallel copy.
289 * In SSA form, each value has exactly one definition. What this does is
290 * ensure that each value used in a phi also has exactly one use. The
291 * destinations of phis are only used by the parallel copy immediately
292 * following the phi nodes and. Thanks to the parallel copy at the end of
293 * the predecessor block, the sources of phi nodes are are the only use of
294 * that value. This allows us to immediately assign all the sources and
295 * destinations of any given phi node to the same register without worrying
296 * about interference at all. We do coalescing to get rid of the parallel
297 * copies where possible.
299 * Before this pass can be run, we have to iterate over the blocks with
300 * add_parallel_copy_to_end_of_block to ensure that the parallel copies at
301 * the ends of blocks exist. We can create the ones at the beginnings as
302 * we go, but the ones at the ends of blocks need to be created ahead of
303 * time because of potential back-edges in the CFG.
306 isolate_phi_nodes_block(nir_block
*block
, void *dead_ctx
)
308 nir_instr
*last_phi_instr
= NULL
;
309 nir_foreach_instr(instr
, block
) {
310 /* Phi nodes only ever come at the start of a block */
311 if (instr
->type
!= nir_instr_type_phi
)
314 last_phi_instr
= instr
;
317 /* If we don't have any phis, then there's nothing for us to do. */
318 if (last_phi_instr
== NULL
)
321 /* If we have phi nodes, we need to create a parallel copy at the
322 * start of this block but after the phi nodes.
324 nir_parallel_copy_instr
*block_pcopy
=
325 nir_parallel_copy_instr_create(dead_ctx
);
326 nir_instr_insert_after(last_phi_instr
, &block_pcopy
->instr
);
328 nir_foreach_instr(instr
, block
) {
329 /* Phi nodes only ever come at the start of a block */
330 if (instr
->type
!= nir_instr_type_phi
)
333 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
334 assert(phi
->dest
.is_ssa
);
335 nir_foreach_phi_src(src
, phi
) {
336 nir_parallel_copy_instr
*pcopy
=
337 get_parallel_copy_at_end_of_block(src
->pred
);
340 nir_parallel_copy_entry
*entry
= rzalloc(dead_ctx
,
341 nir_parallel_copy_entry
);
342 nir_ssa_dest_init(&pcopy
->instr
, &entry
->dest
,
343 phi
->dest
.ssa
.num_components
,
344 phi
->dest
.ssa
.bit_size
, src
->src
.ssa
->name
);
345 exec_list_push_tail(&pcopy
->entries
, &entry
->node
);
347 assert(src
->src
.is_ssa
);
348 nir_instr_rewrite_src(&pcopy
->instr
, &entry
->src
, src
->src
);
350 nir_instr_rewrite_src(&phi
->instr
, &src
->src
,
351 nir_src_for_ssa(&entry
->dest
.ssa
));
354 nir_parallel_copy_entry
*entry
= rzalloc(dead_ctx
,
355 nir_parallel_copy_entry
);
356 nir_ssa_dest_init(&block_pcopy
->instr
, &entry
->dest
,
357 phi
->dest
.ssa
.num_components
, phi
->dest
.ssa
.bit_size
,
359 exec_list_push_tail(&block_pcopy
->entries
, &entry
->node
);
361 nir_ssa_def_rewrite_uses(&phi
->dest
.ssa
,
362 nir_src_for_ssa(&entry
->dest
.ssa
));
364 nir_instr_rewrite_src(&block_pcopy
->instr
, &entry
->src
,
365 nir_src_for_ssa(&phi
->dest
.ssa
));
372 coalesce_phi_nodes_block(nir_block
*block
, struct from_ssa_state
*state
)
374 nir_foreach_instr(instr
, block
) {
375 /* Phi nodes only ever come at the start of a block */
376 if (instr
->type
!= nir_instr_type_phi
)
379 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
381 assert(phi
->dest
.is_ssa
);
382 merge_node
*dest_node
= get_merge_node(&phi
->dest
.ssa
, state
);
384 nir_foreach_phi_src(src
, phi
) {
385 assert(src
->src
.is_ssa
);
386 merge_node
*src_node
= get_merge_node(src
->src
.ssa
, state
);
387 if (src_node
->set
!= dest_node
->set
)
388 merge_merge_sets(dest_node
->set
, src_node
->set
);
396 aggressive_coalesce_parallel_copy(nir_parallel_copy_instr
*pcopy
,
397 struct from_ssa_state
*state
)
399 nir_foreach_parallel_copy_entry(entry
, pcopy
) {
400 if (!entry
->src
.is_ssa
)
403 /* Since load_const instructions are SSA only, we can't replace their
404 * destinations with registers and, therefore, can't coalesce them.
406 if (entry
->src
.ssa
->parent_instr
->type
== nir_instr_type_load_const
)
409 /* Don't try and coalesce these */
410 if (entry
->dest
.ssa
.num_components
!= entry
->src
.ssa
->num_components
)
413 merge_node
*src_node
= get_merge_node(entry
->src
.ssa
, state
);
414 merge_node
*dest_node
= get_merge_node(&entry
->dest
.ssa
, state
);
416 if (src_node
->set
== dest_node
->set
)
419 if (!merge_sets_interfere(src_node
->set
, dest_node
->set
))
420 merge_merge_sets(src_node
->set
, dest_node
->set
);
425 aggressive_coalesce_block(nir_block
*block
, struct from_ssa_state
*state
)
427 nir_parallel_copy_instr
*start_pcopy
= NULL
;
428 nir_foreach_instr(instr
, block
) {
429 /* Phi nodes only ever come at the start of a block */
430 if (instr
->type
!= nir_instr_type_phi
) {
431 if (instr
->type
!= nir_instr_type_parallel_copy
)
432 break; /* The parallel copy must be right after the phis */
434 start_pcopy
= nir_instr_as_parallel_copy(instr
);
436 aggressive_coalesce_parallel_copy(start_pcopy
, state
);
442 nir_parallel_copy_instr
*end_pcopy
=
443 get_parallel_copy_at_end_of_block(block
);
445 if (end_pcopy
&& end_pcopy
!= start_pcopy
)
446 aggressive_coalesce_parallel_copy(end_pcopy
, state
);
451 static nir_register
*
452 create_reg_for_ssa_def(nir_ssa_def
*def
, nir_function_impl
*impl
)
454 nir_register
*reg
= nir_local_reg_create(impl
);
456 reg
->name
= def
->name
;
457 reg
->num_components
= def
->num_components
;
458 reg
->bit_size
= def
->bit_size
;
459 reg
->num_array_elems
= 0;
465 rewrite_ssa_def(nir_ssa_def
*def
, void *void_state
)
467 struct from_ssa_state
*state
= void_state
;
470 struct hash_entry
*entry
=
471 _mesa_hash_table_search(state
->merge_node_table
, def
);
473 /* In this case, we're part of a phi web. Use the web's register. */
474 merge_node
*node
= (merge_node
*)entry
->data
;
476 /* If it doesn't have a register yet, create one. Note that all of
477 * the things in the merge set should be the same so it doesn't
478 * matter which node's definition we use.
480 if (node
->set
->reg
== NULL
)
481 node
->set
->reg
= create_reg_for_ssa_def(def
, state
->builder
.impl
);
483 reg
= node
->set
->reg
;
485 if (state
->phi_webs_only
)
488 /* We leave load_const SSA values alone. They act as immediates to
489 * the backend. If it got coalesced into a phi, that's ok.
491 if (def
->parent_instr
->type
== nir_instr_type_load_const
)
494 reg
= create_reg_for_ssa_def(def
, state
->builder
.impl
);
497 nir_ssa_def_rewrite_uses(def
, nir_src_for_reg(reg
));
498 assert(list_empty(&def
->uses
) && list_empty(&def
->if_uses
));
500 if (def
->parent_instr
->type
== nir_instr_type_ssa_undef
) {
501 /* If it's an ssa_undef instruction, remove it since we know we just got
502 * rid of all its uses.
504 nir_instr
*parent_instr
= def
->parent_instr
;
505 nir_instr_remove(parent_instr
);
506 ralloc_steal(state
->dead_ctx
, parent_instr
);
507 state
->progress
= true;
511 assert(def
->parent_instr
->type
!= nir_instr_type_load_const
);
513 /* At this point we know a priori that this SSA def is part of a
514 * nir_dest. We can use exec_node_data to get the dest pointer.
516 nir_dest
*dest
= exec_node_data(nir_dest
, def
, ssa
);
518 nir_instr_rewrite_dest(state
->instr
, dest
, nir_dest_for_reg(reg
));
519 state
->progress
= true;
523 /* Resolves ssa definitions to registers. While we're at it, we also
527 resolve_registers_block(nir_block
*block
, struct from_ssa_state
*state
)
529 nir_foreach_instr_safe(instr
, block
) {
530 state
->instr
= instr
;
531 nir_foreach_ssa_def(instr
, rewrite_ssa_def
, state
);
533 if (instr
->type
== nir_instr_type_phi
) {
534 nir_instr_remove(instr
);
535 ralloc_steal(state
->dead_ctx
, instr
);
536 state
->progress
= true;
543 emit_copy(nir_builder
*b
, nir_src src
, nir_src dest_src
)
545 assert(!dest_src
.is_ssa
&&
546 dest_src
.reg
.indirect
== NULL
&&
547 dest_src
.reg
.base_offset
== 0);
550 assert(src
.ssa
->num_components
>= dest_src
.reg
.reg
->num_components
);
552 assert(src
.reg
.reg
->num_components
>= dest_src
.reg
.reg
->num_components
);
554 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_imov
);
555 nir_src_copy(&mov
->src
[0].src
, &src
, mov
);
556 mov
->dest
.dest
= nir_dest_for_reg(dest_src
.reg
.reg
);
557 mov
->dest
.write_mask
= (1 << dest_src
.reg
.reg
->num_components
) - 1;
559 nir_builder_instr_insert(b
, &mov
->instr
);
562 /* Resolves a single parallel copy operation into a sequence of movs
564 * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
565 * Correctness, Code Quality, and Efficiency" by Boissinot et al.
566 * However, I never got the algorithm to work as written, so this version
567 * is slightly modified.
569 * The algorithm works by playing this little shell game with the values.
570 * We start by recording where every source value is and which source value
571 * each destination value should receive. We then grab any copy whose
572 * destination is "empty", i.e. not used as a source, and do the following:
573 * - Find where its source value currently lives
574 * - Emit the move instruction
575 * - Set the location of the source value to the destination
576 * - Mark the location containing the source value
577 * - Mark the destination as no longer needing to be copied
579 * When we run out of "empty" destinations, we have a cycle and so we
580 * create a temporary register, copy to that register, and mark the value
581 * we copied as living in that temporary. Now, the cycle is broken, so we
582 * can continue with the above steps.
585 resolve_parallel_copy(nir_parallel_copy_instr
*pcopy
,
586 struct from_ssa_state
*state
)
588 unsigned num_copies
= 0;
589 nir_foreach_parallel_copy_entry(entry
, pcopy
) {
590 /* Sources may be SSA */
591 if (!entry
->src
.is_ssa
&& entry
->src
.reg
.reg
== entry
->dest
.reg
.reg
)
597 if (num_copies
== 0) {
598 /* Hooray, we don't need any copies! */
599 nir_instr_remove(&pcopy
->instr
);
603 /* The register/source corresponding to the given index */
604 NIR_VLA_ZERO(nir_src
, values
, num_copies
* 2);
606 /* The current location of a given piece of data. We will use -1 for "null" */
607 NIR_VLA_FILL(int, loc
, num_copies
* 2, -1);
609 /* The piece of data that the given piece of data is to be copied from. We will use -1 for "null" */
610 NIR_VLA_FILL(int, pred
, num_copies
* 2, -1);
612 /* The destinations we have yet to properly fill */
613 NIR_VLA(int, to_do
, num_copies
* 2);
616 state
->builder
.cursor
= nir_before_instr(&pcopy
->instr
);
618 /* Now we set everything up:
619 * - All values get assigned a temporary index
620 * - Current locations are set from sources
621 * - Predicessors are recorded from sources and destinations
624 nir_foreach_parallel_copy_entry(entry
, pcopy
) {
625 /* Sources may be SSA */
626 if (!entry
->src
.is_ssa
&& entry
->src
.reg
.reg
== entry
->dest
.reg
.reg
)
630 for (int i
= 0; i
< num_vals
; ++i
) {
631 if (nir_srcs_equal(values
[i
], entry
->src
))
635 src_idx
= num_vals
++;
636 values
[src_idx
] = entry
->src
;
639 nir_src dest_src
= nir_src_for_reg(entry
->dest
.reg
.reg
);
642 for (int i
= 0; i
< num_vals
; ++i
) {
643 if (nir_srcs_equal(values
[i
], dest_src
)) {
644 /* Each destination of a parallel copy instruction should be
645 * unique. A destination may get used as a source, so we still
646 * have to walk the list. However, the predecessor should not,
647 * at this point, be set yet, so we should have -1 here.
649 assert(pred
[i
] == -1);
654 dest_idx
= num_vals
++;
655 values
[dest_idx
] = dest_src
;
658 loc
[src_idx
] = src_idx
;
659 pred
[dest_idx
] = src_idx
;
661 to_do
[++to_do_idx
] = dest_idx
;
664 /* Currently empty destinations we can go ahead and fill */
665 NIR_VLA(int, ready
, num_copies
* 2);
668 /* Mark the ones that are ready for copying. We know an index is a
669 * destination if it has a predecessor and it's ready for copying if
670 * it's not marked as containing data.
672 for (int i
= 0; i
< num_vals
; i
++) {
673 if (pred
[i
] != -1 && loc
[i
] == -1)
674 ready
[++ready_idx
] = i
;
677 while (to_do_idx
>= 0) {
678 while (ready_idx
>= 0) {
679 int b
= ready
[ready_idx
--];
681 emit_copy(&state
->builder
, values
[loc
[a
]], values
[b
]);
683 /* If any other copies want a they can find it at b */
686 /* b has been filled, mark it as not needing to be copied */
689 /* If a needs to be filled, it's ready for copying now */
691 ready
[++ready_idx
] = a
;
693 int b
= to_do
[to_do_idx
--];
697 /* If we got here, then we don't have any more trivial copies that we
698 * can do. We have to break a cycle, so we create a new temporary
699 * register for that purpose. Normally, if going out of SSA after
700 * register allocation, you would want to avoid creating temporary
701 * registers. However, we are going out of SSA before register
702 * allocation, so we would rather not create extra register
703 * dependencies for the backend to deal with. If it wants, the
704 * backend can coalesce the (possibly multiple) temporaries.
706 assert(num_vals
< num_copies
* 2);
707 nir_register
*reg
= nir_local_reg_create(state
->builder
.impl
);
708 reg
->name
= "copy_temp";
709 reg
->num_array_elems
= 0;
710 if (values
[b
].is_ssa
)
711 reg
->num_components
= values
[b
].ssa
->num_components
;
713 reg
->num_components
= values
[b
].reg
.reg
->num_components
;
714 values
[num_vals
].is_ssa
= false;
715 values
[num_vals
].reg
.reg
= reg
;
717 emit_copy(&state
->builder
, values
[b
], values
[num_vals
]);
719 ready
[++ready_idx
] = b
;
723 nir_instr_remove(&pcopy
->instr
);
726 /* Resolves the parallel copies in a block. Each block can have at most
727 * two: One at the beginning, right after all the phi noces, and one at
728 * the end (or right before the final jump if it exists).
731 resolve_parallel_copies_block(nir_block
*block
, struct from_ssa_state
*state
)
733 /* At this point, we have removed all of the phi nodes. If a parallel
734 * copy existed right after the phi nodes in this block, it is now the
737 nir_instr
*first_instr
= nir_block_first_instr(block
);
738 if (first_instr
== NULL
)
739 return true; /* Empty, nothing to do. */
741 if (first_instr
->type
== nir_instr_type_parallel_copy
) {
742 nir_parallel_copy_instr
*pcopy
= nir_instr_as_parallel_copy(first_instr
);
744 resolve_parallel_copy(pcopy
, state
);
747 /* It's possible that the above code already cleaned up the end parallel
748 * copy. However, doing so removed it form the instructions list so we
749 * won't find it here. Therefore, it's safe to go ahead and just look
750 * for one and clean it up if it exists.
752 nir_parallel_copy_instr
*end_pcopy
=
753 get_parallel_copy_at_end_of_block(block
);
755 resolve_parallel_copy(end_pcopy
, state
);
761 nir_convert_from_ssa_impl(nir_function_impl
*impl
, bool phi_webs_only
)
763 struct from_ssa_state state
;
765 nir_builder_init(&state
.builder
, impl
);
766 state
.dead_ctx
= ralloc_context(NULL
);
767 state
.phi_webs_only
= phi_webs_only
;
768 state
.merge_node_table
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
769 _mesa_key_pointer_equal
);
770 state
.progress
= false;
772 nir_foreach_block(block
, impl
) {
773 add_parallel_copy_to_end_of_block(block
, state
.dead_ctx
);
776 nir_foreach_block(block
, impl
) {
777 isolate_phi_nodes_block(block
, state
.dead_ctx
);
780 /* Mark metadata as dirty before we ask for liveness analysis */
781 nir_metadata_preserve(impl
, nir_metadata_block_index
|
782 nir_metadata_dominance
);
784 nir_metadata_require(impl
, nir_metadata_live_ssa_defs
|
785 nir_metadata_dominance
);
787 nir_foreach_block(block
, impl
) {
788 coalesce_phi_nodes_block(block
, &state
);
791 nir_foreach_block(block
, impl
) {
792 aggressive_coalesce_block(block
, &state
);
795 nir_foreach_block(block
, impl
) {
796 resolve_registers_block(block
, &state
);
799 nir_foreach_block(block
, impl
) {
800 resolve_parallel_copies_block(block
, &state
);
803 nir_metadata_preserve(impl
, nir_metadata_block_index
|
804 nir_metadata_dominance
);
806 /* Clean up dead instructions and the hash tables */
807 _mesa_hash_table_destroy(state
.merge_node_table
, NULL
);
808 ralloc_free(state
.dead_ctx
);
809 return state
.progress
;
813 nir_convert_from_ssa(nir_shader
*shader
, bool phi_webs_only
)
815 bool progress
= false;
817 nir_foreach_function(function
, shader
) {
819 progress
|= nir_convert_from_ssa_impl(function
->impl
, phi_webs_only
);
827 place_phi_read(nir_shader
*shader
, nir_register
*reg
,
828 nir_ssa_def
*def
, nir_block
*block
)
830 if (block
!= def
->parent_instr
->block
) {
831 /* Try to go up the single-successor tree */
832 bool all_single_successors
= true;
833 struct set_entry
*entry
;
834 set_foreach(block
->predecessors
, entry
) {
835 nir_block
*pred
= (nir_block
*)entry
->key
;
836 if (pred
->successors
[0] && pred
->successors
[1]) {
837 all_single_successors
= false;
842 if (all_single_successors
) {
843 /* All predecessors of this block have exactly one successor and it
844 * is this block so they must eventually lead here without
845 * intersecting each other. Place the reads in the predecessors
846 * instead of this block.
848 set_foreach(block
->predecessors
, entry
)
849 place_phi_read(shader
, reg
, def
, (nir_block
*)entry
->key
);
854 nir_alu_instr
*mov
= nir_alu_instr_create(shader
, nir_op_imov
);
855 mov
->src
[0].src
= nir_src_for_ssa(def
);
856 mov
->dest
.dest
= nir_dest_for_reg(reg
);
857 mov
->dest
.write_mask
= (1 << reg
->num_components
) - 1;
858 nir_instr_insert(nir_after_block_before_jump(block
), &mov
->instr
);
861 /** Lower all of the phi nodes in a block to imovs to and from a register
863 * This provides a very quick-and-dirty out-of-SSA pass that you can run on a
864 * single block to convert all of its phis to a register and some imovs.
865 * The code that is generated, while not optimal for actual codegen in a
866 * back-end, is easy to generate, correct, and will turn into the same set of
867 * phis after you call regs_to_ssa and do some copy propagation.
869 * The one intelligent thing this pass does is that it places the moves from
870 * the phi sources as high up the predecessor tree as possible instead of in
871 * the exact predecessor. This means that, in particular, it will crawl into
872 * the deepest nesting of any if-ladders. In order to ensure that doing so is
873 * safe, it stops as soon as one of the predecessors has multiple successors.
876 nir_lower_phis_to_regs_block(nir_block
*block
)
878 nir_function_impl
*impl
= nir_cf_node_get_function(&block
->cf_node
);
879 nir_shader
*shader
= impl
->function
->shader
;
881 bool progress
= false;
882 nir_foreach_instr_safe(instr
, block
) {
883 if (instr
->type
!= nir_instr_type_phi
)
886 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
887 assert(phi
->dest
.is_ssa
);
889 nir_register
*reg
= create_reg_for_ssa_def(&phi
->dest
.ssa
, impl
);
891 nir_alu_instr
*mov
= nir_alu_instr_create(shader
, nir_op_imov
);
892 mov
->src
[0].src
= nir_src_for_reg(reg
);
893 mov
->dest
.write_mask
= (1 << phi
->dest
.ssa
.num_components
) - 1;
894 nir_ssa_dest_init(&mov
->instr
, &mov
->dest
.dest
,
895 phi
->dest
.ssa
.num_components
, phi
->dest
.ssa
.bit_size
,
897 nir_instr_insert(nir_after_instr(&phi
->instr
), &mov
->instr
);
899 nir_ssa_def_rewrite_uses(&phi
->dest
.ssa
,
900 nir_src_for_ssa(&mov
->dest
.dest
.ssa
));
902 nir_foreach_phi_src(src
, phi
) {
903 assert(src
->src
.is_ssa
);
904 /* We don't want derefs ending up in phi sources */
905 assert(!nir_src_as_deref(src
->src
));
906 place_phi_read(shader
, reg
, src
->src
.ssa
, src
->pred
);
909 nir_instr_remove(&phi
->instr
);
917 struct ssa_def_to_reg_state
{
918 nir_function_impl
*impl
;
923 dest_replace_ssa_with_reg(nir_dest
*dest
, void *void_state
)
925 struct ssa_def_to_reg_state
*state
= void_state
;
930 nir_register
*reg
= create_reg_for_ssa_def(&dest
->ssa
, state
->impl
);
932 nir_ssa_def_rewrite_uses(&dest
->ssa
, nir_src_for_reg(reg
));
934 nir_instr
*instr
= dest
->ssa
.parent_instr
;
935 *dest
= nir_dest_for_reg(reg
);
936 dest
->reg
.parent_instr
= instr
;
937 list_addtail(&dest
->reg
.def_link
, ®
->defs
);
939 state
->progress
= true;
944 /** Lower all of the SSA defs in a block to registers
946 * This performs the very simple operation of blindly replacing all of the SSA
947 * defs in the given block with registers. If not used carefully, this may
948 * result in phi nodes with register sources which is technically invalid.
949 * Fortunately, the register-based into-SSA pass handles them anyway.
952 nir_lower_ssa_defs_to_regs_block(nir_block
*block
)
954 nir_function_impl
*impl
= nir_cf_node_get_function(&block
->cf_node
);
955 nir_shader
*shader
= impl
->function
->shader
;
957 struct ssa_def_to_reg_state state
= {
962 nir_foreach_instr(instr
, block
) {
963 if (instr
->type
== nir_instr_type_ssa_undef
) {
964 /* Undefs are just a read of something never written. */
965 nir_ssa_undef_instr
*undef
= nir_instr_as_ssa_undef(instr
);
966 nir_register
*reg
= create_reg_for_ssa_def(&undef
->def
, state
.impl
);
967 nir_ssa_def_rewrite_uses(&undef
->def
, nir_src_for_reg(reg
));
968 } else if (instr
->type
== nir_instr_type_load_const
) {
969 /* Constant loads are SSA-only, we need to insert a move */
970 nir_load_const_instr
*load
= nir_instr_as_load_const(instr
);
971 nir_register
*reg
= create_reg_for_ssa_def(&load
->def
, state
.impl
);
972 nir_ssa_def_rewrite_uses(&load
->def
, nir_src_for_reg(reg
));
974 nir_alu_instr
*mov
= nir_alu_instr_create(shader
, nir_op_imov
);
975 mov
->src
[0].src
= nir_src_for_ssa(&load
->def
);
976 mov
->dest
.dest
= nir_dest_for_reg(reg
);
977 mov
->dest
.write_mask
= (1 << reg
->num_components
) - 1;
978 nir_instr_insert(nir_after_instr(&load
->instr
), &mov
->instr
);
980 nir_foreach_dest(instr
, dest_replace_ssa_with_reg
, &state
);
984 return state
.progress
;