2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Connor Abbott (cwabbott0@gmail.com)
33 * Implements the classic to-SSA algorithm described by Cytron et. al. in
34 * "Efficiently Computing Static Single Assignment Form and the Control
38 /* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */
41 insert_trivial_phi(nir_register
*reg
, nir_block
*block
, void *mem_ctx
)
43 nir_phi_instr
*instr
= nir_phi_instr_create(mem_ctx
);
45 instr
->dest
.reg
.reg
= reg
;
46 struct set_entry
*entry
;
47 set_foreach(block
->predecessors
, entry
) {
48 nir_block
*pred
= (nir_block
*) entry
->key
;
50 nir_phi_src
*src
= ralloc(instr
, nir_phi_src
);
52 src
->src
.is_ssa
= false;
53 src
->src
.reg
.base_offset
= 0;
54 src
->src
.reg
.indirect
= NULL
;
55 src
->src
.reg
.reg
= reg
;
56 exec_list_push_tail(&instr
->srcs
, &src
->node
);
59 nir_instr_insert_before_block(block
, &instr
->instr
);
63 insert_phi_nodes(nir_function_impl
*impl
)
65 void *mem_ctx
= ralloc_parent(impl
);
67 unsigned *work
= calloc(impl
->num_blocks
, sizeof(unsigned));
68 unsigned *has_already
= calloc(impl
->num_blocks
, sizeof(unsigned));
71 * Since the work flags already prevent us from inserting a node that has
72 * ever been inserted into W, we don't need to use a set to represent W.
73 * Also, since no block can ever be inserted into W more than once, we know
74 * that the maximum size of W is the number of basic blocks in the
75 * function. So all we need to handle W is an array and a pointer to the
76 * next element to be inserted and the next element to be removed.
78 nir_block
**W
= malloc(impl
->num_blocks
* sizeof(nir_block
*));
79 unsigned w_start
, w_end
;
81 unsigned iter_count
= 0;
83 nir_index_blocks(impl
);
85 foreach_list_typed(nir_register
, reg
, node
, &impl
->registers
) {
86 if (reg
->num_array_elems
!= 0)
92 struct set_entry
*entry
;
93 set_foreach(reg
->defs
, entry
) {
94 nir_instr
*def
= (nir_instr
*) entry
->key
;
95 if (work
[def
->block
->index
] < iter_count
)
96 W
[w_end
++] = def
->block
;
97 work
[def
->block
->index
] = iter_count
;
100 while (w_start
!= w_end
) {
101 nir_block
*cur
= W
[w_start
++];
102 set_foreach(cur
->dom_frontier
, entry
) {
103 nir_block
*next
= (nir_block
*) entry
->key
;
106 * If there's more than one return statement, then the end block
107 * can be a join point for some definitions. However, there are
108 * no instructions in the end block, so nothing would use those
109 * phi nodes. Of course, we couldn't place those phi nodes
110 * anyways due to the restriction of having no instructions in the
113 if (next
== impl
->end_block
)
116 if (has_already
[next
->index
] < iter_count
) {
117 insert_trivial_phi(reg
, next
, mem_ctx
);
118 has_already
[next
->index
] = iter_count
;
119 if (work
[next
->index
] < iter_count
) {
120 work
[next
->index
] = iter_count
;
136 unsigned num_defs
; /** < used to add indices to debug names */
145 nir_instr
*parent_instr
;
147 nir_function_impl
*impl
;
149 /* map from SSA value -> original register */
150 struct hash_table
*ssa_map
;
153 static nir_ssa_def
*get_ssa_src(nir_register
*reg
, rewrite_state
*state
)
155 unsigned index
= reg
->index
;
157 if (state
->states
[index
].index
== -1) {
159 * We're using an undefined register, create a new undefined SSA value
160 * to preserve the information that this source is undefined
162 nir_ssa_undef_instr
*instr
=
163 nir_ssa_undef_instr_create(state
->mem_ctx
, reg
->num_components
);
166 * We could just insert the undefined instruction before the instruction
167 * we're rewriting, but we could be rewriting a phi source in which case
168 * we can't do that, so do the next easiest thing - insert it at the
169 * beginning of the program. In the end, it doesn't really matter where
170 * the undefined instructions are because they're going to be ignored
173 nir_instr_insert_before_cf_list(&state
->impl
->body
, &instr
->instr
);
177 return state
->states
[index
].stack
[state
->states
[index
].index
];
181 rewrite_use(nir_src
*src
, void *_state
)
183 rewrite_state
*state
= (rewrite_state
*) _state
;
188 unsigned index
= src
->reg
.reg
->index
;
190 if (state
->states
[index
].stack
== NULL
)
194 src
->ssa
= get_ssa_src(src
->reg
.reg
, state
);
196 if (state
->parent_instr
)
197 _mesa_set_add(src
->ssa
->uses
, state
->parent_instr
);
199 _mesa_set_add(src
->ssa
->if_uses
, state
->parent_if
);
204 rewrite_def_forwards(nir_dest
*dest
, void *_state
)
206 rewrite_state
*state
= (rewrite_state
*) _state
;
211 nir_register
*reg
= dest
->reg
.reg
;
212 unsigned index
= reg
->index
;
214 if (state
->states
[index
].stack
== NULL
)
218 if (dest
->reg
.reg
->name
)
219 name
= ralloc_asprintf(state
->mem_ctx
, "%s_%u", dest
->reg
.reg
->name
,
220 state
->states
[index
].num_defs
);
222 nir_ssa_dest_init(state
->parent_instr
, dest
, reg
->num_components
, name
);
224 /* push our SSA destination on the stack */
225 state
->states
[index
].index
++;
226 assert(state
->states
[index
].index
< state
->states
[index
].stack_size
);
227 state
->states
[index
].stack
[state
->states
[index
].index
] = &dest
->ssa
;
228 state
->states
[index
].num_defs
++;
230 _mesa_hash_table_insert(state
->ssa_map
, &dest
->ssa
, reg
);
236 rewrite_alu_instr_forward(nir_alu_instr
*instr
, rewrite_state
*state
)
238 state
->parent_instr
= &instr
->instr
;
240 nir_foreach_src(&instr
->instr
, rewrite_use
, state
);
242 if (instr
->dest
.dest
.is_ssa
)
245 nir_register
*reg
= instr
->dest
.dest
.reg
.reg
;
246 unsigned index
= reg
->index
;
248 if (state
->states
[index
].stack
== NULL
)
251 unsigned write_mask
= instr
->dest
.write_mask
;
252 if (write_mask
!= (1 << instr
->dest
.dest
.reg
.reg
->num_components
) - 1) {
254 * Calculate the number of components the final instruction, which for
255 * per-component things is the number of output components of the
256 * instruction and non-per-component things is the number of enabled
257 * channels in the write mask.
259 unsigned num_components
;
260 if (nir_op_infos
[instr
->op
].output_size
== 0) {
261 unsigned temp
= (write_mask
& 0x5) + ((write_mask
>> 1) & 0x5);
262 num_components
= (temp
& 0x3) + ((temp
>> 2) & 0x3);
264 num_components
= nir_op_infos
[instr
->op
].output_size
;
268 if (instr
->dest
.dest
.reg
.reg
->name
)
269 name
= ralloc_asprintf(state
->mem_ctx
, "%s_%u",
270 reg
->name
, state
->states
[index
].num_defs
);
272 instr
->dest
.write_mask
= (1 << num_components
) - 1;
273 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
.dest
, num_components
, name
);
275 if (nir_op_infos
[instr
->op
].output_size
== 0) {
277 * When we change the output writemask, we need to change the
278 * swizzles for per-component inputs too
280 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
281 if (nir_op_infos
[instr
->op
].input_sizes
[i
] != 0)
284 unsigned new_swizzle
[4] = {0, 0, 0, 0};
287 * We keep two indices:
288 * 1. The index of the original (non-SSA) component
289 * 2. The index of the post-SSA, compacted, component
291 * We need to map the swizzle component at index 1 to the swizzle
292 * component at index 2.
295 unsigned ssa_index
= 0;
296 for (unsigned index
= 0; index
< 4; index
++) {
297 if (!((write_mask
>> index
) & 1))
300 new_swizzle
[ssa_index
] = instr
->src
[i
].swizzle
[index
];
304 for (unsigned j
= 0; j
< 4; j
++)
305 instr
->src
[i
].swizzle
[j
] = new_swizzle
[j
];
310 switch (reg
->num_components
) {
311 case 2: op
= nir_op_vec2
; break;
312 case 3: op
= nir_op_vec3
; break;
313 case 4: op
= nir_op_vec4
; break;
314 default: unreachable("not reached");
317 nir_alu_instr
*vec
= nir_alu_instr_create(state
->mem_ctx
, op
);
319 vec
->dest
.dest
.reg
.reg
= reg
;
320 vec
->dest
.write_mask
= (1 << reg
->num_components
) - 1;
322 nir_ssa_def
*old_src
= get_ssa_src(reg
, state
);
323 nir_ssa_def
*new_src
= &instr
->dest
.dest
.ssa
;
325 unsigned ssa_index
= 0;
326 for (unsigned i
= 0; i
< reg
->num_components
; i
++) {
327 vec
->src
[i
].src
.is_ssa
= true;
328 if ((write_mask
>> i
) & 1) {
329 vec
->src
[i
].src
.ssa
= new_src
;
330 if (nir_op_infos
[instr
->op
].output_size
== 0)
331 vec
->src
[i
].swizzle
[0] = ssa_index
;
333 vec
->src
[i
].swizzle
[0] = i
;
336 vec
->src
[i
].src
.ssa
= old_src
;
337 vec
->src
[i
].swizzle
[0] = i
;
341 nir_instr_insert_after(&instr
->instr
, &vec
->instr
);
343 state
->parent_instr
= &vec
->instr
;
344 rewrite_def_forwards(&vec
->dest
.dest
, state
);
346 rewrite_def_forwards(&instr
->dest
.dest
, state
);
351 rewrite_phi_instr(nir_phi_instr
*instr
, rewrite_state
*state
)
353 state
->parent_instr
= &instr
->instr
;
354 rewrite_def_forwards(&instr
->dest
, state
);
358 rewrite_instr_forward(nir_instr
*instr
, rewrite_state
*state
)
360 if (instr
->type
== nir_instr_type_alu
) {
361 rewrite_alu_instr_forward(nir_instr_as_alu(instr
), state
);
365 if (instr
->type
== nir_instr_type_phi
) {
366 rewrite_phi_instr(nir_instr_as_phi(instr
), state
);
370 state
->parent_instr
= instr
;
372 nir_foreach_src(instr
, rewrite_use
, state
);
373 nir_foreach_dest(instr
, rewrite_def_forwards
, state
);
377 rewrite_phi_sources(nir_block
*block
, nir_block
*pred
, rewrite_state
*state
)
379 nir_foreach_instr(block
, instr
) {
380 if (instr
->type
!= nir_instr_type_phi
)
383 nir_phi_instr
*phi_instr
= nir_instr_as_phi(instr
);
385 state
->parent_instr
= instr
;
387 nir_foreach_phi_src(phi_instr
, src
) {
388 if (src
->pred
== pred
) {
389 rewrite_use(&src
->src
, state
);
397 rewrite_def_backwards(nir_dest
*dest
, void *_state
)
399 rewrite_state
*state
= (rewrite_state
*) _state
;
404 struct hash_entry
*entry
=
405 _mesa_hash_table_search(state
->ssa_map
, &dest
->ssa
);
410 nir_register
*reg
= (nir_register
*) entry
->data
;
411 unsigned index
= reg
->index
;
413 state
->states
[index
].index
--;
414 assert(state
->states
[index
].index
>= -1);
420 rewrite_instr_backwards(nir_instr
*instr
, rewrite_state
*state
)
422 nir_foreach_dest(instr
, rewrite_def_backwards
, state
);
426 rewrite_block(nir_block
*block
, rewrite_state
*state
)
428 /* This will skip over any instructions after the current one, which is
429 * what we want because those instructions (vector gather, conditional
430 * select) will already be in SSA form.
432 nir_foreach_instr_safe(block
, instr
) {
433 rewrite_instr_forward(instr
, state
);
436 if (block
!= state
->impl
->end_block
&&
437 !nir_cf_node_is_last(&block
->cf_node
) &&
438 nir_cf_node_next(&block
->cf_node
)->type
== nir_cf_node_if
) {
439 nir_if
*if_stmt
= nir_cf_node_as_if(nir_cf_node_next(&block
->cf_node
));
440 state
->parent_instr
= NULL
;
441 state
->parent_if
= if_stmt
;
442 rewrite_use(&if_stmt
->condition
, state
);
445 if (block
->successors
[0])
446 rewrite_phi_sources(block
->successors
[0], block
, state
);
447 if (block
->successors
[1])
448 rewrite_phi_sources(block
->successors
[1], block
, state
);
450 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++)
451 rewrite_block(block
->dom_children
[i
], state
);
453 nir_foreach_instr_reverse(block
, instr
) {
454 rewrite_instr_backwards(instr
, state
);
459 remove_unused_regs(nir_function_impl
*impl
, rewrite_state
*state
)
461 foreach_list_typed_safe(nir_register
, reg
, node
, &impl
->registers
) {
462 if (state
->states
[reg
->index
].stack
!= NULL
)
463 exec_node_remove(®
->node
);
468 init_rewrite_state(nir_function_impl
*impl
, rewrite_state
*state
)
471 state
->mem_ctx
= ralloc_parent(impl
);
472 state
->ssa_map
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
473 _mesa_key_pointer_equal
);
474 state
->states
= ralloc_array(NULL
, reg_state
, impl
->reg_alloc
);
476 foreach_list_typed(nir_register
, reg
, node
, &impl
->registers
) {
477 assert(reg
->index
< impl
->reg_alloc
);
478 if (reg
->num_array_elems
> 0) {
479 state
->states
[reg
->index
].stack
= NULL
;
482 * Calculate a conservative estimate of the stack size based on the
483 * number of definitions there are. Note that this function *must* be
484 * called after phi nodes are inserted so we can count phi node
487 unsigned stack_size
= reg
->defs
->entries
;
489 state
->states
[reg
->index
].stack
= ralloc_array(state
->states
,
493 state
->states
[reg
->index
].stack_size
= stack_size
;
495 state
->states
[reg
->index
].index
= -1;
496 state
->states
[reg
->index
].num_defs
= 0;
502 destroy_rewrite_state(rewrite_state
*state
)
504 _mesa_hash_table_destroy(state
->ssa_map
, NULL
);
505 ralloc_free(state
->states
);
509 nir_convert_to_ssa_impl(nir_function_impl
*impl
)
511 nir_metadata_require(impl
, nir_metadata_dominance
);
513 insert_phi_nodes(impl
);
516 init_rewrite_state(impl
, &state
);
518 rewrite_block(impl
->start_block
, &state
);
520 remove_unused_regs(impl
, &state
);
522 nir_metadata_preserve(impl
, nir_metadata_block_index
|
523 nir_metadata_dominance
);
525 destroy_rewrite_state(&state
);
529 nir_convert_to_ssa(nir_shader
*shader
)
531 nir_foreach_overload(shader
, overload
) {
533 nir_convert_to_ssa_impl(overload
->impl
);