2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Connor Abbott (cwabbott0@gmail.com)
33 * Implements the classic to-SSA algorithm described by Cytron et. al. in
34 * "Efficiently Computing Static Single Assignment Form and the Control
38 /* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */
41 insert_trivial_phi(nir_register
*reg
, nir_block
*block
, void *mem_ctx
)
43 nir_phi_instr
*instr
= nir_phi_instr_create(mem_ctx
);
45 instr
->dest
.reg
.reg
= reg
;
46 struct set_entry
*entry
;
47 set_foreach(block
->predecessors
, entry
) {
48 nir_block
*pred
= (nir_block
*) entry
->key
;
50 nir_phi_src
*src
= ralloc(instr
, nir_phi_src
);
52 src
->src
.is_ssa
= false;
53 src
->src
.reg
.base_offset
= 0;
54 src
->src
.reg
.indirect
= NULL
;
55 src
->src
.reg
.reg
= reg
;
56 exec_list_push_tail(&instr
->srcs
, &src
->node
);
59 nir_instr_insert_before_block(block
, &instr
->instr
);
63 insert_phi_nodes(nir_function_impl
*impl
)
65 void *mem_ctx
= ralloc_parent(impl
);
67 unsigned *work
= calloc(impl
->num_blocks
, sizeof(unsigned));
68 unsigned *has_already
= calloc(impl
->num_blocks
, sizeof(unsigned));
71 * Since the work flags already prevent us from inserting a node that has
72 * ever been inserted into W, we don't need to use a set to represent W.
73 * Also, since no block can ever be inserted into W more than once, we know
74 * that the maximum size of W is the number of basic blocks in the
75 * function. So all we need to handle W is an array and a pointer to the
76 * next element to be inserted and the next element to be removed.
78 nir_block
**W
= malloc(impl
->num_blocks
* sizeof(nir_block
*));
79 unsigned w_start
, w_end
;
81 unsigned iter_count
= 0;
83 nir_index_blocks(impl
);
85 foreach_list_typed(nir_register
, reg
, node
, &impl
->registers
) {
86 if (reg
->num_array_elems
!= 0)
92 nir_foreach_def(reg
, dest
) {
93 nir_instr
*def
= dest
->reg
.parent_instr
;
94 if (work
[def
->block
->index
] < iter_count
)
95 W
[w_end
++] = def
->block
;
96 work
[def
->block
->index
] = iter_count
;
99 while (w_start
!= w_end
) {
100 nir_block
*cur
= W
[w_start
++];
101 struct set_entry
*entry
;
102 set_foreach(cur
->dom_frontier
, entry
) {
103 nir_block
*next
= (nir_block
*) entry
->key
;
106 * If there's more than one return statement, then the end block
107 * can be a join point for some definitions. However, there are
108 * no instructions in the end block, so nothing would use those
109 * phi nodes. Of course, we couldn't place those phi nodes
110 * anyways due to the restriction of having no instructions in the
113 if (next
== impl
->end_block
)
116 if (has_already
[next
->index
] < iter_count
) {
117 insert_trivial_phi(reg
, next
, mem_ctx
);
118 has_already
[next
->index
] = iter_count
;
119 if (work
[next
->index
] < iter_count
) {
120 work
[next
->index
] = iter_count
;
136 unsigned num_defs
; /** < used to add indices to debug names */
145 nir_instr
*parent_instr
;
147 nir_function_impl
*impl
;
149 /* map from SSA value -> original register */
150 struct hash_table
*ssa_map
;
153 static nir_ssa_def
*get_ssa_src(nir_register
*reg
, rewrite_state
*state
)
155 unsigned index
= reg
->index
;
157 if (state
->states
[index
].index
== -1) {
159 * We're using an undefined register, create a new undefined SSA value
160 * to preserve the information that this source is undefined
162 nir_ssa_undef_instr
*instr
=
163 nir_ssa_undef_instr_create(state
->mem_ctx
, reg
->num_components
,
167 * We could just insert the undefined instruction before the instruction
168 * we're rewriting, but we could be rewriting a phi source in which case
169 * we can't do that, so do the next easiest thing - insert it at the
170 * beginning of the program. In the end, it doesn't really matter where
171 * the undefined instructions are because they're going to be ignored
174 nir_instr_insert_before_cf_list(&state
->impl
->body
, &instr
->instr
);
178 return state
->states
[index
].stack
[state
->states
[index
].index
];
182 rewrite_use(nir_src
*src
, void *_state
)
184 rewrite_state
*state
= (rewrite_state
*) _state
;
189 unsigned index
= src
->reg
.reg
->index
;
191 if (state
->states
[index
].stack
== NULL
)
194 nir_ssa_def
*def
= get_ssa_src(src
->reg
.reg
, state
);
195 if (state
->parent_instr
)
196 nir_instr_rewrite_src(state
->parent_instr
, src
, nir_src_for_ssa(def
));
198 nir_if_rewrite_condition(state
->parent_if
, nir_src_for_ssa(def
));
204 rewrite_def_forwards(nir_dest
*dest
, void *_state
)
206 rewrite_state
*state
= (rewrite_state
*) _state
;
211 nir_register
*reg
= dest
->reg
.reg
;
212 unsigned index
= reg
->index
;
214 if (state
->states
[index
].stack
== NULL
)
218 if (dest
->reg
.reg
->name
)
219 name
= ralloc_asprintf(state
->mem_ctx
, "%s_%u", dest
->reg
.reg
->name
,
220 state
->states
[index
].num_defs
);
222 list_del(&dest
->reg
.def_link
);
223 nir_ssa_dest_init(state
->parent_instr
, dest
, reg
->num_components
,
224 reg
->bit_size
, name
);
227 /* push our SSA destination on the stack */
228 state
->states
[index
].index
++;
229 assert(state
->states
[index
].index
< state
->states
[index
].stack_size
);
230 state
->states
[index
].stack
[state
->states
[index
].index
] = &dest
->ssa
;
231 state
->states
[index
].num_defs
++;
233 _mesa_hash_table_insert(state
->ssa_map
, &dest
->ssa
, reg
);
239 rewrite_alu_instr_forward(nir_alu_instr
*instr
, rewrite_state
*state
)
241 state
->parent_instr
= &instr
->instr
;
243 nir_foreach_src(&instr
->instr
, rewrite_use
, state
);
245 if (instr
->dest
.dest
.is_ssa
)
248 nir_register
*reg
= instr
->dest
.dest
.reg
.reg
;
249 unsigned index
= reg
->index
;
251 if (state
->states
[index
].stack
== NULL
)
254 unsigned write_mask
= instr
->dest
.write_mask
;
255 if (write_mask
!= (1 << instr
->dest
.dest
.reg
.reg
->num_components
) - 1) {
257 * Calculate the number of components the final instruction, which for
258 * per-component things is the number of output components of the
259 * instruction and non-per-component things is the number of enabled
260 * channels in the write mask.
262 unsigned num_components
;
263 if (nir_op_infos
[instr
->op
].output_size
== 0) {
264 unsigned temp
= (write_mask
& 0x5) + ((write_mask
>> 1) & 0x5);
265 num_components
= (temp
& 0x3) + ((temp
>> 2) & 0x3);
267 num_components
= nir_op_infos
[instr
->op
].output_size
;
271 if (instr
->dest
.dest
.reg
.reg
->name
)
272 name
= ralloc_asprintf(state
->mem_ctx
, "%s_%u",
273 reg
->name
, state
->states
[index
].num_defs
);
275 instr
->dest
.write_mask
= (1 << num_components
) - 1;
276 list_del(&instr
->dest
.dest
.reg
.def_link
);
277 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
.dest
, num_components
,
278 reg
->bit_size
, name
);
281 if (nir_op_infos
[instr
->op
].output_size
== 0) {
283 * When we change the output writemask, we need to change the
284 * swizzles for per-component inputs too
286 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
287 if (nir_op_infos
[instr
->op
].input_sizes
[i
] != 0)
290 unsigned new_swizzle
[4] = {0, 0, 0, 0};
293 * We keep two indices:
294 * 1. The index of the original (non-SSA) component
295 * 2. The index of the post-SSA, compacted, component
297 * We need to map the swizzle component at index 1 to the swizzle
298 * component at index 2.
301 unsigned ssa_index
= 0;
302 for (unsigned index
= 0; index
< 4; index
++) {
303 if (!((write_mask
>> index
) & 1))
306 new_swizzle
[ssa_index
] = instr
->src
[i
].swizzle
[index
];
310 for (unsigned j
= 0; j
< 4; j
++)
311 instr
->src
[i
].swizzle
[j
] = new_swizzle
[j
];
316 switch (reg
->num_components
) {
317 case 2: op
= nir_op_vec2
; break;
318 case 3: op
= nir_op_vec3
; break;
319 case 4: op
= nir_op_vec4
; break;
320 default: unreachable("not reached");
323 nir_alu_instr
*vec
= nir_alu_instr_create(state
->mem_ctx
, op
);
325 vec
->dest
.dest
.reg
.reg
= reg
;
326 vec
->dest
.write_mask
= (1 << reg
->num_components
) - 1;
328 nir_ssa_def
*old_src
= get_ssa_src(reg
, state
);
329 nir_ssa_def
*new_src
= &instr
->dest
.dest
.ssa
;
331 unsigned ssa_index
= 0;
332 for (unsigned i
= 0; i
< reg
->num_components
; i
++) {
333 vec
->src
[i
].src
.is_ssa
= true;
334 if ((write_mask
>> i
) & 1) {
335 vec
->src
[i
].src
.ssa
= new_src
;
336 if (nir_op_infos
[instr
->op
].output_size
== 0)
337 vec
->src
[i
].swizzle
[0] = ssa_index
;
339 vec
->src
[i
].swizzle
[0] = i
;
342 vec
->src
[i
].src
.ssa
= old_src
;
343 vec
->src
[i
].swizzle
[0] = i
;
347 nir_instr_insert_after(&instr
->instr
, &vec
->instr
);
349 state
->parent_instr
= &vec
->instr
;
350 rewrite_def_forwards(&vec
->dest
.dest
, state
);
352 rewrite_def_forwards(&instr
->dest
.dest
, state
);
357 rewrite_phi_instr(nir_phi_instr
*instr
, rewrite_state
*state
)
359 state
->parent_instr
= &instr
->instr
;
360 rewrite_def_forwards(&instr
->dest
, state
);
364 rewrite_instr_forward(nir_instr
*instr
, rewrite_state
*state
)
366 if (instr
->type
== nir_instr_type_alu
) {
367 rewrite_alu_instr_forward(nir_instr_as_alu(instr
), state
);
371 if (instr
->type
== nir_instr_type_phi
) {
372 rewrite_phi_instr(nir_instr_as_phi(instr
), state
);
376 state
->parent_instr
= instr
;
378 nir_foreach_src(instr
, rewrite_use
, state
);
379 nir_foreach_dest(instr
, rewrite_def_forwards
, state
);
383 rewrite_phi_sources(nir_block
*block
, nir_block
*pred
, rewrite_state
*state
)
385 nir_foreach_instr(block
, instr
) {
386 if (instr
->type
!= nir_instr_type_phi
)
389 nir_phi_instr
*phi_instr
= nir_instr_as_phi(instr
);
391 state
->parent_instr
= instr
;
393 nir_foreach_phi_src(phi_instr
, src
) {
394 if (src
->pred
== pred
) {
395 rewrite_use(&src
->src
, state
);
403 rewrite_def_backwards(nir_dest
*dest
, void *_state
)
405 rewrite_state
*state
= (rewrite_state
*) _state
;
410 struct hash_entry
*entry
=
411 _mesa_hash_table_search(state
->ssa_map
, &dest
->ssa
);
416 nir_register
*reg
= (nir_register
*) entry
->data
;
417 unsigned index
= reg
->index
;
419 state
->states
[index
].index
--;
420 assert(state
->states
[index
].index
>= -1);
426 rewrite_instr_backwards(nir_instr
*instr
, rewrite_state
*state
)
428 nir_foreach_dest(instr
, rewrite_def_backwards
, state
);
432 rewrite_block(nir_block
*block
, rewrite_state
*state
)
434 /* This will skip over any instructions after the current one, which is
435 * what we want because those instructions (vector gather, conditional
436 * select) will already be in SSA form.
438 nir_foreach_instr_safe(block
, instr
) {
439 rewrite_instr_forward(instr
, state
);
442 if (block
!= state
->impl
->end_block
&&
443 !nir_cf_node_is_last(&block
->cf_node
) &&
444 nir_cf_node_next(&block
->cf_node
)->type
== nir_cf_node_if
) {
445 nir_if
*if_stmt
= nir_cf_node_as_if(nir_cf_node_next(&block
->cf_node
));
446 state
->parent_instr
= NULL
;
447 state
->parent_if
= if_stmt
;
448 rewrite_use(&if_stmt
->condition
, state
);
451 if (block
->successors
[0])
452 rewrite_phi_sources(block
->successors
[0], block
, state
);
453 if (block
->successors
[1])
454 rewrite_phi_sources(block
->successors
[1], block
, state
);
456 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++)
457 rewrite_block(block
->dom_children
[i
], state
);
459 nir_foreach_instr_reverse(block
, instr
) {
460 rewrite_instr_backwards(instr
, state
);
465 remove_unused_regs(nir_function_impl
*impl
, rewrite_state
*state
)
467 foreach_list_typed_safe(nir_register
, reg
, node
, &impl
->registers
) {
468 if (state
->states
[reg
->index
].stack
!= NULL
)
469 exec_node_remove(®
->node
);
474 init_rewrite_state(nir_function_impl
*impl
, rewrite_state
*state
)
477 state
->mem_ctx
= ralloc_parent(impl
);
478 state
->ssa_map
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
479 _mesa_key_pointer_equal
);
480 state
->states
= ralloc_array(NULL
, reg_state
, impl
->reg_alloc
);
482 foreach_list_typed(nir_register
, reg
, node
, &impl
->registers
) {
483 assert(reg
->index
< impl
->reg_alloc
);
484 if (reg
->num_array_elems
> 0) {
485 state
->states
[reg
->index
].stack
= NULL
;
488 * Calculate a conservative estimate of the stack size based on the
489 * number of definitions there are. Note that this function *must* be
490 * called after phi nodes are inserted so we can count phi node
493 unsigned stack_size
= list_length(®
->defs
);
495 state
->states
[reg
->index
].stack
= ralloc_array(state
->states
,
499 state
->states
[reg
->index
].stack_size
= stack_size
;
501 state
->states
[reg
->index
].index
= -1;
502 state
->states
[reg
->index
].num_defs
= 0;
508 destroy_rewrite_state(rewrite_state
*state
)
510 _mesa_hash_table_destroy(state
->ssa_map
, NULL
);
511 ralloc_free(state
->states
);
515 nir_convert_to_ssa_impl(nir_function_impl
*impl
)
517 nir_metadata_require(impl
, nir_metadata_dominance
);
519 insert_phi_nodes(impl
);
522 init_rewrite_state(impl
, &state
);
524 rewrite_block(nir_start_block(impl
), &state
);
526 remove_unused_regs(impl
, &state
);
528 nir_metadata_preserve(impl
, nir_metadata_block_index
|
529 nir_metadata_dominance
);
531 destroy_rewrite_state(&state
);
535 nir_convert_to_ssa(nir_shader
*shader
)
537 nir_foreach_function(shader
, function
) {
539 nir_convert_to_ssa_impl(function
->impl
);