2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Connor Abbott (cwabbott0@gmail.com)
33 * Implements the classic to-SSA algorithm described by Cytron et. al. in
34 * "Efficiently Computing Static Single Assignment Form and the Control
38 /* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */
41 insert_trivial_phi(nir_register
*reg
, nir_block
*block
, void *mem_ctx
)
43 nir_phi_instr
*instr
= nir_phi_instr_create(mem_ctx
);
45 instr
->dest
.reg
.reg
= reg
;
46 struct set_entry
*entry
;
47 set_foreach(block
->predecessors
, entry
) {
48 nir_block
*pred
= (nir_block
*) entry
->key
;
50 nir_phi_src
*src
= ralloc(instr
, nir_phi_src
);
52 src
->src
.is_ssa
= false;
53 src
->src
.reg
.base_offset
= 0;
54 src
->src
.reg
.indirect
= NULL
;
55 src
->src
.reg
.reg
= reg
;
56 exec_list_push_tail(&instr
->srcs
, &src
->node
);
59 nir_instr_insert_before_block(block
, &instr
->instr
);
63 insert_phi_nodes(nir_function_impl
*impl
)
65 void *mem_ctx
= ralloc_parent(impl
);
67 unsigned *work
= calloc(impl
->num_blocks
, sizeof(unsigned));
68 unsigned *has_already
= calloc(impl
->num_blocks
, sizeof(unsigned));
71 * Since the work flags already prevent us from inserting a node that has
72 * ever been inserted into W, we don't need to use a set to represent W.
73 * Also, since no block can ever be inserted into W more than once, we know
74 * that the maximum size of W is the number of basic blocks in the
75 * function. So all we need to handle W is an array and a pointer to the
76 * next element to be inserted and the next element to be removed.
78 nir_block
**W
= malloc(impl
->num_blocks
* sizeof(nir_block
*));
79 unsigned w_start
, w_end
;
81 unsigned iter_count
= 0;
83 nir_index_blocks(impl
);
85 foreach_list_typed(nir_register
, reg
, node
, &impl
->registers
) {
86 if (reg
->num_array_elems
!= 0)
92 nir_foreach_def(reg
, dest
) {
93 nir_instr
*def
= dest
->reg
.parent_instr
;
94 if (work
[def
->block
->index
] < iter_count
)
95 W
[w_end
++] = def
->block
;
96 work
[def
->block
->index
] = iter_count
;
99 while (w_start
!= w_end
) {
100 nir_block
*cur
= W
[w_start
++];
101 struct set_entry
*entry
;
102 set_foreach(cur
->dom_frontier
, entry
) {
103 nir_block
*next
= (nir_block
*) entry
->key
;
106 * If there's more than one return statement, then the end block
107 * can be a join point for some definitions. However, there are
108 * no instructions in the end block, so nothing would use those
109 * phi nodes. Of course, we couldn't place those phi nodes
110 * anyways due to the restriction of having no instructions in the
113 if (next
== impl
->end_block
)
116 if (has_already
[next
->index
] < iter_count
) {
117 insert_trivial_phi(reg
, next
, mem_ctx
);
118 has_already
[next
->index
] = iter_count
;
119 if (work
[next
->index
] < iter_count
) {
120 work
[next
->index
] = iter_count
;
136 unsigned num_defs
; /** < used to add indices to debug names */
145 nir_instr
*parent_instr
;
147 nir_function_impl
*impl
;
149 /* map from SSA value -> original register */
150 struct hash_table
*ssa_map
;
153 static nir_ssa_def
*get_ssa_src(nir_register
*reg
, rewrite_state
*state
)
155 unsigned index
= reg
->index
;
157 if (state
->states
[index
].index
== -1) {
159 * We're using an undefined register, create a new undefined SSA value
160 * to preserve the information that this source is undefined
162 nir_ssa_undef_instr
*instr
=
163 nir_ssa_undef_instr_create(state
->mem_ctx
, reg
->num_components
);
166 * We could just insert the undefined instruction before the instruction
167 * we're rewriting, but we could be rewriting a phi source in which case
168 * we can't do that, so do the next easiest thing - insert it at the
169 * beginning of the program. In the end, it doesn't really matter where
170 * the undefined instructions are because they're going to be ignored
173 nir_instr_insert_before_cf_list(&state
->impl
->body
, &instr
->instr
);
177 return state
->states
[index
].stack
[state
->states
[index
].index
];
181 rewrite_use(nir_src
*src
, void *_state
)
183 rewrite_state
*state
= (rewrite_state
*) _state
;
188 unsigned index
= src
->reg
.reg
->index
;
190 if (state
->states
[index
].stack
== NULL
)
193 nir_ssa_def
*def
= get_ssa_src(src
->reg
.reg
, state
);
194 if (state
->parent_instr
)
195 nir_instr_rewrite_src(state
->parent_instr
, src
, nir_src_for_ssa(def
));
197 nir_if_rewrite_condition(state
->parent_if
, nir_src_for_ssa(def
));
203 rewrite_def_forwards(nir_dest
*dest
, void *_state
)
205 rewrite_state
*state
= (rewrite_state
*) _state
;
210 nir_register
*reg
= dest
->reg
.reg
;
211 unsigned index
= reg
->index
;
213 if (state
->states
[index
].stack
== NULL
)
217 if (dest
->reg
.reg
->name
)
218 name
= ralloc_asprintf(state
->mem_ctx
, "%s_%u", dest
->reg
.reg
->name
,
219 state
->states
[index
].num_defs
);
221 list_del(&dest
->reg
.def_link
);
222 nir_ssa_dest_init(state
->parent_instr
, dest
, reg
->num_components
,
223 reg
->bit_size
, name
);
225 /* push our SSA destination on the stack */
226 state
->states
[index
].index
++;
227 assert(state
->states
[index
].index
< state
->states
[index
].stack_size
);
228 state
->states
[index
].stack
[state
->states
[index
].index
] = &dest
->ssa
;
229 state
->states
[index
].num_defs
++;
231 _mesa_hash_table_insert(state
->ssa_map
, &dest
->ssa
, reg
);
237 rewrite_alu_instr_forward(nir_alu_instr
*instr
, rewrite_state
*state
)
239 state
->parent_instr
= &instr
->instr
;
241 nir_foreach_src(&instr
->instr
, rewrite_use
, state
);
243 if (instr
->dest
.dest
.is_ssa
)
246 nir_register
*reg
= instr
->dest
.dest
.reg
.reg
;
247 unsigned index
= reg
->index
;
249 if (state
->states
[index
].stack
== NULL
)
252 unsigned write_mask
= instr
->dest
.write_mask
;
253 if (write_mask
!= (1 << instr
->dest
.dest
.reg
.reg
->num_components
) - 1) {
255 * Calculate the number of components the final instruction, which for
256 * per-component things is the number of output components of the
257 * instruction and non-per-component things is the number of enabled
258 * channels in the write mask.
260 unsigned num_components
;
261 if (nir_op_infos
[instr
->op
].output_size
== 0) {
262 unsigned temp
= (write_mask
& 0x5) + ((write_mask
>> 1) & 0x5);
263 num_components
= (temp
& 0x3) + ((temp
>> 2) & 0x3);
265 num_components
= nir_op_infos
[instr
->op
].output_size
;
269 if (instr
->dest
.dest
.reg
.reg
->name
)
270 name
= ralloc_asprintf(state
->mem_ctx
, "%s_%u",
271 reg
->name
, state
->states
[index
].num_defs
);
273 instr
->dest
.write_mask
= (1 << num_components
) - 1;
274 list_del(&instr
->dest
.dest
.reg
.def_link
);
275 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
.dest
, num_components
,
276 reg
->bit_size
, name
);
278 if (nir_op_infos
[instr
->op
].output_size
== 0) {
280 * When we change the output writemask, we need to change the
281 * swizzles for per-component inputs too
283 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
284 if (nir_op_infos
[instr
->op
].input_sizes
[i
] != 0)
287 unsigned new_swizzle
[4] = {0, 0, 0, 0};
290 * We keep two indices:
291 * 1. The index of the original (non-SSA) component
292 * 2. The index of the post-SSA, compacted, component
294 * We need to map the swizzle component at index 1 to the swizzle
295 * component at index 2.
298 unsigned ssa_index
= 0;
299 for (unsigned index
= 0; index
< 4; index
++) {
300 if (!((write_mask
>> index
) & 1))
303 new_swizzle
[ssa_index
] = instr
->src
[i
].swizzle
[index
];
307 for (unsigned j
= 0; j
< 4; j
++)
308 instr
->src
[i
].swizzle
[j
] = new_swizzle
[j
];
313 switch (reg
->num_components
) {
314 case 2: op
= nir_op_vec2
; break;
315 case 3: op
= nir_op_vec3
; break;
316 case 4: op
= nir_op_vec4
; break;
317 default: unreachable("not reached");
320 nir_alu_instr
*vec
= nir_alu_instr_create(state
->mem_ctx
, op
);
322 vec
->dest
.dest
.reg
.reg
= reg
;
323 vec
->dest
.write_mask
= (1 << reg
->num_components
) - 1;
325 nir_ssa_def
*old_src
= get_ssa_src(reg
, state
);
326 nir_ssa_def
*new_src
= &instr
->dest
.dest
.ssa
;
328 unsigned ssa_index
= 0;
329 for (unsigned i
= 0; i
< reg
->num_components
; i
++) {
330 vec
->src
[i
].src
.is_ssa
= true;
331 if ((write_mask
>> i
) & 1) {
332 vec
->src
[i
].src
.ssa
= new_src
;
333 if (nir_op_infos
[instr
->op
].output_size
== 0)
334 vec
->src
[i
].swizzle
[0] = ssa_index
;
336 vec
->src
[i
].swizzle
[0] = i
;
339 vec
->src
[i
].src
.ssa
= old_src
;
340 vec
->src
[i
].swizzle
[0] = i
;
344 nir_instr_insert_after(&instr
->instr
, &vec
->instr
);
346 state
->parent_instr
= &vec
->instr
;
347 rewrite_def_forwards(&vec
->dest
.dest
, state
);
349 rewrite_def_forwards(&instr
->dest
.dest
, state
);
354 rewrite_phi_instr(nir_phi_instr
*instr
, rewrite_state
*state
)
356 state
->parent_instr
= &instr
->instr
;
357 rewrite_def_forwards(&instr
->dest
, state
);
361 rewrite_instr_forward(nir_instr
*instr
, rewrite_state
*state
)
363 if (instr
->type
== nir_instr_type_alu
) {
364 rewrite_alu_instr_forward(nir_instr_as_alu(instr
), state
);
368 if (instr
->type
== nir_instr_type_phi
) {
369 rewrite_phi_instr(nir_instr_as_phi(instr
), state
);
373 state
->parent_instr
= instr
;
375 nir_foreach_src(instr
, rewrite_use
, state
);
376 nir_foreach_dest(instr
, rewrite_def_forwards
, state
);
380 rewrite_phi_sources(nir_block
*block
, nir_block
*pred
, rewrite_state
*state
)
382 nir_foreach_instr(block
, instr
) {
383 if (instr
->type
!= nir_instr_type_phi
)
386 nir_phi_instr
*phi_instr
= nir_instr_as_phi(instr
);
388 state
->parent_instr
= instr
;
390 nir_foreach_phi_src(phi_instr
, src
) {
391 if (src
->pred
== pred
) {
392 rewrite_use(&src
->src
, state
);
400 rewrite_def_backwards(nir_dest
*dest
, void *_state
)
402 rewrite_state
*state
= (rewrite_state
*) _state
;
407 struct hash_entry
*entry
=
408 _mesa_hash_table_search(state
->ssa_map
, &dest
->ssa
);
413 nir_register
*reg
= (nir_register
*) entry
->data
;
414 unsigned index
= reg
->index
;
416 state
->states
[index
].index
--;
417 assert(state
->states
[index
].index
>= -1);
423 rewrite_instr_backwards(nir_instr
*instr
, rewrite_state
*state
)
425 nir_foreach_dest(instr
, rewrite_def_backwards
, state
);
429 rewrite_block(nir_block
*block
, rewrite_state
*state
)
431 /* This will skip over any instructions after the current one, which is
432 * what we want because those instructions (vector gather, conditional
433 * select) will already be in SSA form.
435 nir_foreach_instr_safe(block
, instr
) {
436 rewrite_instr_forward(instr
, state
);
439 if (block
!= state
->impl
->end_block
&&
440 !nir_cf_node_is_last(&block
->cf_node
) &&
441 nir_cf_node_next(&block
->cf_node
)->type
== nir_cf_node_if
) {
442 nir_if
*if_stmt
= nir_cf_node_as_if(nir_cf_node_next(&block
->cf_node
));
443 state
->parent_instr
= NULL
;
444 state
->parent_if
= if_stmt
;
445 rewrite_use(&if_stmt
->condition
, state
);
448 if (block
->successors
[0])
449 rewrite_phi_sources(block
->successors
[0], block
, state
);
450 if (block
->successors
[1])
451 rewrite_phi_sources(block
->successors
[1], block
, state
);
453 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++)
454 rewrite_block(block
->dom_children
[i
], state
);
456 nir_foreach_instr_reverse(block
, instr
) {
457 rewrite_instr_backwards(instr
, state
);
462 remove_unused_regs(nir_function_impl
*impl
, rewrite_state
*state
)
464 foreach_list_typed_safe(nir_register
, reg
, node
, &impl
->registers
) {
465 if (state
->states
[reg
->index
].stack
!= NULL
)
466 exec_node_remove(®
->node
);
471 init_rewrite_state(nir_function_impl
*impl
, rewrite_state
*state
)
474 state
->mem_ctx
= ralloc_parent(impl
);
475 state
->ssa_map
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
476 _mesa_key_pointer_equal
);
477 state
->states
= ralloc_array(NULL
, reg_state
, impl
->reg_alloc
);
479 foreach_list_typed(nir_register
, reg
, node
, &impl
->registers
) {
480 assert(reg
->index
< impl
->reg_alloc
);
481 if (reg
->num_array_elems
> 0) {
482 state
->states
[reg
->index
].stack
= NULL
;
485 * Calculate a conservative estimate of the stack size based on the
486 * number of definitions there are. Note that this function *must* be
487 * called after phi nodes are inserted so we can count phi node
490 unsigned stack_size
= list_length(®
->defs
);
492 state
->states
[reg
->index
].stack
= ralloc_array(state
->states
,
496 state
->states
[reg
->index
].stack_size
= stack_size
;
498 state
->states
[reg
->index
].index
= -1;
499 state
->states
[reg
->index
].num_defs
= 0;
505 destroy_rewrite_state(rewrite_state
*state
)
507 _mesa_hash_table_destroy(state
->ssa_map
, NULL
);
508 ralloc_free(state
->states
);
512 nir_convert_to_ssa_impl(nir_function_impl
*impl
)
514 nir_metadata_require(impl
, nir_metadata_dominance
);
516 insert_phi_nodes(impl
);
519 init_rewrite_state(impl
, &state
);
521 rewrite_block(nir_start_block(impl
), &state
);
523 remove_unused_regs(impl
, &state
);
525 nir_metadata_preserve(impl
, nir_metadata_block_index
|
526 nir_metadata_dominance
);
528 destroy_rewrite_state(&state
);
532 nir_convert_to_ssa(nir_shader
*shader
)
534 nir_foreach_function(shader
, function
) {
536 nir_convert_to_ssa_impl(function
->impl
);