2 * Copyright © 2015 Red Hat
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_control_flow.h"
27 /* Secret Decoder Ring:
29 * Allocate and clone a foo.
31 * Clone body of foo (ie. parent class, embedded struct, etc)
35 /* True if we are cloning an entire shader. */
38 /* If true allows the clone operation to fall back to the original pointer
39 * if no clone pointer is found in the remap table. This allows us to
40 * clone a loop body without having to add srcs from outside the loop to
41 * the remap table. This is useful for loop unrolling.
43 bool allow_remap_fallback
;
45 /* maps orig ptr -> cloned ptr: */
46 struct hash_table
*remap_table
;
48 /* List of phi sources. */
49 struct list_head phi_srcs
;
51 /* new shader object, used as memctx for just about everything else: */
56 init_clone_state(clone_state
*state
, struct hash_table
*remap_table
,
57 bool global
, bool allow_remap_fallback
)
59 state
->global_clone
= global
;
60 state
->allow_remap_fallback
= allow_remap_fallback
;
63 state
->remap_table
= remap_table
;
65 state
->remap_table
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
66 _mesa_key_pointer_equal
);
69 list_inithead(&state
->phi_srcs
);
73 free_clone_state(clone_state
*state
)
75 _mesa_hash_table_destroy(state
->remap_table
, NULL
);
79 _lookup_ptr(clone_state
*state
, const void *ptr
, bool global
)
81 struct hash_entry
*entry
;
86 if (!state
->global_clone
&& global
)
89 entry
= _mesa_hash_table_search(state
->remap_table
, ptr
);
91 assert(state
->allow_remap_fallback
);
99 add_remap(clone_state
*state
, void *nptr
, const void *ptr
)
101 _mesa_hash_table_insert(state
->remap_table
, ptr
, nptr
);
105 remap_local(clone_state
*state
, const void *ptr
)
107 return _lookup_ptr(state
, ptr
, false);
111 remap_global(clone_state
*state
, const void *ptr
)
113 return _lookup_ptr(state
, ptr
, true);
116 static nir_register
*
117 remap_reg(clone_state
*state
, const nir_register
*reg
)
119 return _lookup_ptr(state
, reg
, reg
->is_global
);
122 static nir_variable
*
123 remap_var(clone_state
*state
, const nir_variable
*var
)
125 return _lookup_ptr(state
, var
, nir_variable_is_global(var
));
129 nir_constant_clone(const nir_constant
*c
, nir_variable
*nvar
)
131 nir_constant
*nc
= ralloc(nvar
, nir_constant
);
133 memcpy(nc
->values
, c
->values
, sizeof(nc
->values
));
134 nc
->num_elements
= c
->num_elements
;
135 nc
->elements
= ralloc_array(nvar
, nir_constant
*, c
->num_elements
);
136 for (unsigned i
= 0; i
< c
->num_elements
; i
++) {
137 nc
->elements
[i
] = nir_constant_clone(c
->elements
[i
], nvar
);
143 /* NOTE: for cloning nir_variables, bypass nir_variable_create to avoid
144 * having to deal with locals and globals separately:
147 nir_variable_clone(const nir_variable
*var
, nir_shader
*shader
)
149 nir_variable
*nvar
= rzalloc(shader
, nir_variable
);
151 nvar
->type
= var
->type
;
152 nvar
->name
= ralloc_strdup(nvar
, var
->name
);
153 nvar
->data
= var
->data
;
154 nvar
->num_state_slots
= var
->num_state_slots
;
155 nvar
->state_slots
= ralloc_array(nvar
, nir_state_slot
, var
->num_state_slots
);
156 memcpy(nvar
->state_slots
, var
->state_slots
,
157 var
->num_state_slots
* sizeof(nir_state_slot
));
158 if (var
->constant_initializer
) {
159 nvar
->constant_initializer
=
160 nir_constant_clone(var
->constant_initializer
, nvar
);
162 nvar
->interface_type
= var
->interface_type
;
167 static nir_variable
*
168 clone_variable(clone_state
*state
, const nir_variable
*var
)
170 nir_variable
*nvar
= nir_variable_clone(var
, state
->ns
);
171 add_remap(state
, nvar
, var
);
176 /* clone list of nir_variable: */
178 clone_var_list(clone_state
*state
, struct exec_list
*dst
,
179 const struct exec_list
*list
)
181 exec_list_make_empty(dst
);
182 foreach_list_typed(nir_variable
, var
, node
, list
) {
183 nir_variable
*nvar
= clone_variable(state
, var
);
184 exec_list_push_tail(dst
, &nvar
->node
);
188 /* NOTE: for cloning nir_registers, bypass nir_global/local_reg_create()
189 * to avoid having to deal with locals and globals separately:
191 static nir_register
*
192 clone_register(clone_state
*state
, const nir_register
*reg
)
194 nir_register
*nreg
= rzalloc(state
->ns
, nir_register
);
195 add_remap(state
, nreg
, reg
);
197 nreg
->num_components
= reg
->num_components
;
198 nreg
->bit_size
= reg
->bit_size
;
199 nreg
->num_array_elems
= reg
->num_array_elems
;
200 nreg
->index
= reg
->index
;
201 nreg
->name
= ralloc_strdup(nreg
, reg
->name
);
202 nreg
->is_global
= reg
->is_global
;
203 nreg
->is_packed
= reg
->is_packed
;
205 /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
206 list_inithead(&nreg
->uses
);
207 list_inithead(&nreg
->defs
);
208 list_inithead(&nreg
->if_uses
);
213 /* clone list of nir_register: */
215 clone_reg_list(clone_state
*state
, struct exec_list
*dst
,
216 const struct exec_list
*list
)
218 exec_list_make_empty(dst
);
219 foreach_list_typed(nir_register
, reg
, node
, list
) {
220 nir_register
*nreg
= clone_register(state
, reg
);
221 exec_list_push_tail(dst
, &nreg
->node
);
226 __clone_src(clone_state
*state
, void *ninstr_or_if
,
227 nir_src
*nsrc
, const nir_src
*src
)
229 nsrc
->is_ssa
= src
->is_ssa
;
231 nsrc
->ssa
= remap_local(state
, src
->ssa
);
233 nsrc
->reg
.reg
= remap_reg(state
, src
->reg
.reg
);
234 if (src
->reg
.indirect
) {
235 nsrc
->reg
.indirect
= ralloc(ninstr_or_if
, nir_src
);
236 __clone_src(state
, ninstr_or_if
, nsrc
->reg
.indirect
, src
->reg
.indirect
);
238 nsrc
->reg
.base_offset
= src
->reg
.base_offset
;
243 __clone_dst(clone_state
*state
, nir_instr
*ninstr
,
244 nir_dest
*ndst
, const nir_dest
*dst
)
246 ndst
->is_ssa
= dst
->is_ssa
;
248 nir_ssa_dest_init(ninstr
, ndst
, dst
->ssa
.num_components
,
249 dst
->ssa
.bit_size
, dst
->ssa
.name
);
250 add_remap(state
, &ndst
->ssa
, &dst
->ssa
);
252 ndst
->reg
.reg
= remap_reg(state
, dst
->reg
.reg
);
253 if (dst
->reg
.indirect
) {
254 ndst
->reg
.indirect
= ralloc(ninstr
, nir_src
);
255 __clone_src(state
, ninstr
, ndst
->reg
.indirect
, dst
->reg
.indirect
);
257 ndst
->reg
.base_offset
= dst
->reg
.base_offset
;
261 static nir_deref
*clone_deref(clone_state
*state
, const nir_deref
*deref
,
262 nir_instr
*ninstr
, nir_deref
*parent
);
264 static nir_deref_var
*
265 clone_deref_var(clone_state
*state
, const nir_deref_var
*dvar
,
268 nir_variable
*nvar
= remap_var(state
, dvar
->var
);
269 nir_deref_var
*ndvar
= nir_deref_var_create(ninstr
, nvar
);
271 if (dvar
->deref
.child
)
272 ndvar
->deref
.child
= clone_deref(state
, dvar
->deref
.child
,
273 ninstr
, &ndvar
->deref
);
278 static nir_deref_array
*
279 clone_deref_array(clone_state
*state
, const nir_deref_array
*darr
,
280 nir_instr
*ninstr
, nir_deref
*parent
)
282 nir_deref_array
*ndarr
= nir_deref_array_create(parent
);
284 ndarr
->deref
.type
= darr
->deref
.type
;
285 if (darr
->deref
.child
)
286 ndarr
->deref
.child
= clone_deref(state
, darr
->deref
.child
,
287 ninstr
, &ndarr
->deref
);
289 ndarr
->deref_array_type
= darr
->deref_array_type
;
290 ndarr
->base_offset
= darr
->base_offset
;
291 if (ndarr
->deref_array_type
== nir_deref_array_type_indirect
)
292 __clone_src(state
, ninstr
, &ndarr
->indirect
, &darr
->indirect
);
297 static nir_deref_struct
*
298 clone_deref_struct(clone_state
*state
, const nir_deref_struct
*dstr
,
299 nir_instr
*ninstr
, nir_deref
*parent
)
301 nir_deref_struct
*ndstr
= nir_deref_struct_create(parent
, dstr
->index
);
303 ndstr
->deref
.type
= dstr
->deref
.type
;
304 if (dstr
->deref
.child
)
305 ndstr
->deref
.child
= clone_deref(state
, dstr
->deref
.child
,
306 ninstr
, &ndstr
->deref
);
312 clone_deref(clone_state
*state
, const nir_deref
*dref
,
313 nir_instr
*ninstr
, nir_deref
*parent
)
315 switch (dref
->deref_type
) {
316 case nir_deref_type_array
:
317 return &clone_deref_array(state
, nir_deref_as_array(dref
),
318 ninstr
, parent
)->deref
;
319 case nir_deref_type_struct
:
320 return &clone_deref_struct(state
, nir_deref_as_struct(dref
),
321 ninstr
, parent
)->deref
;
323 unreachable("bad deref type");
328 static nir_alu_instr
*
329 clone_alu(clone_state
*state
, const nir_alu_instr
*alu
)
331 nir_alu_instr
*nalu
= nir_alu_instr_create(state
->ns
, alu
->op
);
332 nalu
->exact
= alu
->exact
;
334 __clone_dst(state
, &nalu
->instr
, &nalu
->dest
.dest
, &alu
->dest
.dest
);
335 nalu
->dest
.saturate
= alu
->dest
.saturate
;
336 nalu
->dest
.write_mask
= alu
->dest
.write_mask
;
338 for (unsigned i
= 0; i
< nir_op_infos
[alu
->op
].num_inputs
; i
++) {
339 __clone_src(state
, &nalu
->instr
, &nalu
->src
[i
].src
, &alu
->src
[i
].src
);
340 nalu
->src
[i
].negate
= alu
->src
[i
].negate
;
341 nalu
->src
[i
].abs
= alu
->src
[i
].abs
;
342 memcpy(nalu
->src
[i
].swizzle
, alu
->src
[i
].swizzle
,
343 sizeof(nalu
->src
[i
].swizzle
));
349 static nir_deref_instr
*
350 clone_deref_instr(clone_state
*state
, const nir_deref_instr
*deref
)
352 nir_deref_instr
*nderef
=
353 nir_deref_instr_create(state
->ns
, deref
->deref_type
);
355 __clone_dst(state
, &nderef
->instr
, &nderef
->dest
, &deref
->dest
);
357 nderef
->mode
= deref
->mode
;
358 nderef
->type
= deref
->type
;
360 if (deref
->deref_type
== nir_deref_type_var
) {
361 nderef
->var
= remap_var(state
, deref
->var
);
365 __clone_src(state
, &nderef
->instr
, &nderef
->parent
, &deref
->parent
);
367 switch (deref
->deref_type
) {
368 case nir_deref_type_struct
:
369 nderef
->strct
.index
= deref
->strct
.index
;
372 case nir_deref_type_array
:
373 __clone_src(state
, &nderef
->instr
,
374 &nderef
->arr
.index
, &deref
->arr
.index
);
377 case nir_deref_type_array_wildcard
:
378 case nir_deref_type_cast
:
383 unreachable("Invalid instruction deref type");
389 static nir_intrinsic_instr
*
390 clone_intrinsic(clone_state
*state
, const nir_intrinsic_instr
*itr
)
392 nir_intrinsic_instr
*nitr
=
393 nir_intrinsic_instr_create(state
->ns
, itr
->intrinsic
);
395 unsigned num_variables
= nir_intrinsic_infos
[itr
->intrinsic
].num_variables
;
396 unsigned num_srcs
= nir_intrinsic_infos
[itr
->intrinsic
].num_srcs
;
398 if (nir_intrinsic_infos
[itr
->intrinsic
].has_dest
)
399 __clone_dst(state
, &nitr
->instr
, &nitr
->dest
, &itr
->dest
);
401 nitr
->num_components
= itr
->num_components
;
402 memcpy(nitr
->const_index
, itr
->const_index
, sizeof(nitr
->const_index
));
404 for (unsigned i
= 0; i
< num_variables
; i
++) {
405 nitr
->variables
[i
] = clone_deref_var(state
, itr
->variables
[i
],
409 for (unsigned i
= 0; i
< num_srcs
; i
++)
410 __clone_src(state
, &nitr
->instr
, &nitr
->src
[i
], &itr
->src
[i
]);
415 static nir_load_const_instr
*
416 clone_load_const(clone_state
*state
, const nir_load_const_instr
*lc
)
418 nir_load_const_instr
*nlc
=
419 nir_load_const_instr_create(state
->ns
, lc
->def
.num_components
,
422 memcpy(&nlc
->value
, &lc
->value
, sizeof(nlc
->value
));
424 add_remap(state
, &nlc
->def
, &lc
->def
);
429 static nir_ssa_undef_instr
*
430 clone_ssa_undef(clone_state
*state
, const nir_ssa_undef_instr
*sa
)
432 nir_ssa_undef_instr
*nsa
=
433 nir_ssa_undef_instr_create(state
->ns
, sa
->def
.num_components
,
436 add_remap(state
, &nsa
->def
, &sa
->def
);
441 static nir_tex_instr
*
442 clone_tex(clone_state
*state
, const nir_tex_instr
*tex
)
444 nir_tex_instr
*ntex
= nir_tex_instr_create(state
->ns
, tex
->num_srcs
);
446 ntex
->sampler_dim
= tex
->sampler_dim
;
447 ntex
->dest_type
= tex
->dest_type
;
449 __clone_dst(state
, &ntex
->instr
, &ntex
->dest
, &tex
->dest
);
450 for (unsigned i
= 0; i
< ntex
->num_srcs
; i
++) {
451 ntex
->src
[i
].src_type
= tex
->src
[i
].src_type
;
452 __clone_src(state
, &ntex
->instr
, &ntex
->src
[i
].src
, &tex
->src
[i
].src
);
454 ntex
->coord_components
= tex
->coord_components
;
455 ntex
->is_array
= tex
->is_array
;
456 ntex
->is_shadow
= tex
->is_shadow
;
457 ntex
->is_new_style_shadow
= tex
->is_new_style_shadow
;
458 ntex
->component
= tex
->component
;
460 ntex
->texture_index
= tex
->texture_index
;
462 ntex
->texture
= clone_deref_var(state
, tex
->texture
, &ntex
->instr
);
463 ntex
->texture_array_size
= tex
->texture_array_size
;
465 ntex
->sampler_index
= tex
->sampler_index
;
467 ntex
->sampler
= clone_deref_var(state
, tex
->sampler
, &ntex
->instr
);
472 static nir_phi_instr
*
473 clone_phi(clone_state
*state
, const nir_phi_instr
*phi
, nir_block
*nblk
)
475 nir_phi_instr
*nphi
= nir_phi_instr_create(state
->ns
);
477 __clone_dst(state
, &nphi
->instr
, &nphi
->dest
, &phi
->dest
);
479 /* Cloning a phi node is a bit different from other instructions. The
480 * sources of phi instructions are the only time where we can use an SSA
481 * def before it is defined. In order to handle this, we just copy over
482 * the sources from the old phi instruction directly and then fix them up
483 * in a second pass once all the instrutions in the function have been
486 * In order to ensure that the copied sources (which are the same as the
487 * old phi instruction's sources for now) don't get inserted into the old
488 * shader's use-def lists, we have to add the phi instruction *before* we
489 * set up its sources.
491 nir_instr_insert_after_block(nblk
, &nphi
->instr
);
493 foreach_list_typed(nir_phi_src
, src
, node
, &phi
->srcs
) {
494 nir_phi_src
*nsrc
= ralloc(nphi
, nir_phi_src
);
496 /* Just copy the old source for now. */
497 memcpy(nsrc
, src
, sizeof(*src
));
499 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
500 * we have to set the parent_instr manually. It doesn't really matter
501 * when we do it, so we might as well do it here.
503 nsrc
->src
.parent_instr
= &nphi
->instr
;
505 /* Stash it in the list of phi sources. We'll walk this list and fix up
506 * sources at the very end of clone_function_impl.
508 list_add(&nsrc
->src
.use_link
, &state
->phi_srcs
);
510 exec_list_push_tail(&nphi
->srcs
, &nsrc
->node
);
516 static nir_jump_instr
*
517 clone_jump(clone_state
*state
, const nir_jump_instr
*jmp
)
519 nir_jump_instr
*njmp
= nir_jump_instr_create(state
->ns
, jmp
->type
);
524 static nir_call_instr
*
525 clone_call(clone_state
*state
, const nir_call_instr
*call
)
527 nir_function
*ncallee
= remap_global(state
, call
->callee
);
528 nir_call_instr
*ncall
= nir_call_instr_create(state
->ns
, ncallee
);
530 for (unsigned i
= 0; i
< ncall
->num_params
; i
++)
531 ncall
->params
[i
] = clone_deref_var(state
, call
->params
[i
], &ncall
->instr
);
533 ncall
->return_deref
= clone_deref_var(state
, call
->return_deref
,
540 clone_instr(clone_state
*state
, const nir_instr
*instr
)
542 switch (instr
->type
) {
543 case nir_instr_type_alu
:
544 return &clone_alu(state
, nir_instr_as_alu(instr
))->instr
;
545 case nir_instr_type_deref
:
546 return &clone_deref_instr(state
, nir_instr_as_deref(instr
))->instr
;
547 case nir_instr_type_intrinsic
:
548 return &clone_intrinsic(state
, nir_instr_as_intrinsic(instr
))->instr
;
549 case nir_instr_type_load_const
:
550 return &clone_load_const(state
, nir_instr_as_load_const(instr
))->instr
;
551 case nir_instr_type_ssa_undef
:
552 return &clone_ssa_undef(state
, nir_instr_as_ssa_undef(instr
))->instr
;
553 case nir_instr_type_tex
:
554 return &clone_tex(state
, nir_instr_as_tex(instr
))->instr
;
555 case nir_instr_type_phi
:
556 unreachable("Cannot clone phis with clone_instr");
557 case nir_instr_type_jump
:
558 return &clone_jump(state
, nir_instr_as_jump(instr
))->instr
;
559 case nir_instr_type_call
:
560 return &clone_call(state
, nir_instr_as_call(instr
))->instr
;
561 case nir_instr_type_parallel_copy
:
562 unreachable("Cannot clone parallel copies");
564 unreachable("bad instr type");
570 clone_block(clone_state
*state
, struct exec_list
*cf_list
, const nir_block
*blk
)
572 /* Don't actually create a new block. Just use the one from the tail of
573 * the list. NIR guarantees that the tail of the list is a block and that
574 * no two blocks are side-by-side in the IR; It should be empty.
577 exec_node_data(nir_block
, exec_list_get_tail(cf_list
), cf_node
.node
);
578 assert(nblk
->cf_node
.type
== nir_cf_node_block
);
579 assert(exec_list_is_empty(&nblk
->instr_list
));
581 /* We need this for phi sources */
582 add_remap(state
, nblk
, blk
);
584 nir_foreach_instr(instr
, blk
) {
585 if (instr
->type
== nir_instr_type_phi
) {
586 /* Phi instructions are a bit of a special case when cloning because
587 * we don't want inserting the instruction to automatically handle
588 * use/defs for us. Instead, we need to wait until all the
589 * blocks/instructions are in so that we can set their sources up.
591 clone_phi(state
, nir_instr_as_phi(instr
), nblk
);
593 nir_instr
*ninstr
= clone_instr(state
, instr
);
594 nir_instr_insert_after_block(nblk
, ninstr
);
602 clone_cf_list(clone_state
*state
, struct exec_list
*dst
,
603 const struct exec_list
*list
);
606 clone_if(clone_state
*state
, struct exec_list
*cf_list
, const nir_if
*i
)
608 nir_if
*ni
= nir_if_create(state
->ns
);
610 __clone_src(state
, ni
, &ni
->condition
, &i
->condition
);
612 nir_cf_node_insert_end(cf_list
, &ni
->cf_node
);
614 clone_cf_list(state
, &ni
->then_list
, &i
->then_list
);
615 clone_cf_list(state
, &ni
->else_list
, &i
->else_list
);
621 clone_loop(clone_state
*state
, struct exec_list
*cf_list
, const nir_loop
*loop
)
623 nir_loop
*nloop
= nir_loop_create(state
->ns
);
625 nir_cf_node_insert_end(cf_list
, &nloop
->cf_node
);
627 clone_cf_list(state
, &nloop
->body
, &loop
->body
);
632 /* clone list of nir_cf_node: */
634 clone_cf_list(clone_state
*state
, struct exec_list
*dst
,
635 const struct exec_list
*list
)
637 foreach_list_typed(nir_cf_node
, cf
, node
, list
) {
639 case nir_cf_node_block
:
640 clone_block(state
, dst
, nir_cf_node_as_block(cf
));
643 clone_if(state
, dst
, nir_cf_node_as_if(cf
));
645 case nir_cf_node_loop
:
646 clone_loop(state
, dst
, nir_cf_node_as_loop(cf
));
649 unreachable("bad cf type");
654 /* After we've cloned almost everything, we have to walk the list of phi
655 * sources and fix them up. Thanks to loops, the block and SSA value for a
656 * phi source may not be defined when we first encounter it. Instead, we
657 * add it to the phi_srcs list and we fix it up here.
660 fixup_phi_srcs(clone_state
*state
)
662 list_for_each_entry_safe(nir_phi_src
, src
, &state
->phi_srcs
, src
.use_link
) {
663 src
->pred
= remap_local(state
, src
->pred
);
665 /* Remove from this list */
666 list_del(&src
->src
.use_link
);
668 if (src
->src
.is_ssa
) {
669 src
->src
.ssa
= remap_local(state
, src
->src
.ssa
);
670 list_addtail(&src
->src
.use_link
, &src
->src
.ssa
->uses
);
672 src
->src
.reg
.reg
= remap_reg(state
, src
->src
.reg
.reg
);
673 list_addtail(&src
->src
.use_link
, &src
->src
.reg
.reg
->uses
);
676 assert(list_empty(&state
->phi_srcs
));
680 nir_cf_list_clone(nir_cf_list
*dst
, nir_cf_list
*src
, nir_cf_node
*parent
,
681 struct hash_table
*remap_table
)
683 exec_list_make_empty(&dst
->list
);
684 dst
->impl
= src
->impl
;
686 if (exec_list_is_empty(&src
->list
))
690 init_clone_state(&state
, remap_table
, false, true);
692 /* We use the same shader */
693 state
.ns
= src
->impl
->function
->shader
;
695 /* The control-flow code assumes that the list of cf_nodes always starts
696 * and ends with a block. We start by adding an empty block.
698 nir_block
*nblk
= nir_block_create(state
.ns
);
699 nblk
->cf_node
.parent
= parent
;
700 exec_list_push_tail(&dst
->list
, &nblk
->cf_node
.node
);
702 clone_cf_list(&state
, &dst
->list
, &src
->list
);
704 fixup_phi_srcs(&state
);
707 static nir_function_impl
*
708 clone_function_impl(clone_state
*state
, const nir_function_impl
*fi
)
710 nir_function_impl
*nfi
= nir_function_impl_create_bare(state
->ns
);
712 clone_var_list(state
, &nfi
->locals
, &fi
->locals
);
713 clone_reg_list(state
, &nfi
->registers
, &fi
->registers
);
714 nfi
->reg_alloc
= fi
->reg_alloc
;
716 nfi
->num_params
= fi
->num_params
;
717 nfi
->params
= ralloc_array(state
->ns
, nir_variable
*, fi
->num_params
);
718 for (unsigned i
= 0; i
< fi
->num_params
; i
++) {
719 nfi
->params
[i
] = clone_variable(state
, fi
->params
[i
]);
722 nfi
->return_var
= clone_variable(state
, fi
->return_var
);
724 assert(list_empty(&state
->phi_srcs
));
726 clone_cf_list(state
, &nfi
->body
, &fi
->body
);
728 fixup_phi_srcs(state
);
730 /* All metadata is invalidated in the cloning process */
731 nfi
->valid_metadata
= 0;
737 nir_function_impl_clone(const nir_function_impl
*fi
)
740 init_clone_state(&state
, NULL
, false, false);
742 /* We use the same shader */
743 state
.ns
= fi
->function
->shader
;
745 nir_function_impl
*nfi
= clone_function_impl(&state
, fi
);
747 free_clone_state(&state
);
752 static nir_function
*
753 clone_function(clone_state
*state
, const nir_function
*fxn
, nir_shader
*ns
)
755 assert(ns
== state
->ns
);
756 nir_function
*nfxn
= nir_function_create(ns
, fxn
->name
);
758 /* Needed for call instructions */
759 add_remap(state
, nfxn
, fxn
);
761 nfxn
->num_params
= fxn
->num_params
;
762 nfxn
->params
= ralloc_array(state
->ns
, nir_parameter
, fxn
->num_params
);
763 memcpy(nfxn
->params
, fxn
->params
, sizeof(nir_parameter
) * fxn
->num_params
);
765 nfxn
->return_type
= fxn
->return_type
;
767 /* At first glance, it looks like we should clone the function_impl here.
768 * However, call instructions need to be able to reference at least the
769 * function and those will get processed as we clone the function_impls.
770 * We stop here and do function_impls as a second pass.
777 nir_shader_clone(void *mem_ctx
, const nir_shader
*s
)
780 init_clone_state(&state
, NULL
, true, false);
782 nir_shader
*ns
= nir_shader_create(mem_ctx
, s
->info
.stage
, s
->options
, NULL
);
785 clone_var_list(&state
, &ns
->uniforms
, &s
->uniforms
);
786 clone_var_list(&state
, &ns
->inputs
, &s
->inputs
);
787 clone_var_list(&state
, &ns
->outputs
, &s
->outputs
);
788 clone_var_list(&state
, &ns
->shared
, &s
->shared
);
789 clone_var_list(&state
, &ns
->globals
, &s
->globals
);
790 clone_var_list(&state
, &ns
->system_values
, &s
->system_values
);
792 /* Go through and clone functions */
793 foreach_list_typed(nir_function
, fxn
, node
, &s
->functions
)
794 clone_function(&state
, fxn
, ns
);
796 /* Only after all functions are cloned can we clone the actual function
797 * implementations. This is because nir_call_instrs need to reference the
798 * functions of other functions and we don't know what order the functions
799 * will have in the list.
801 nir_foreach_function(fxn
, s
) {
802 nir_function
*nfxn
= remap_global(&state
, fxn
);
803 nfxn
->impl
= clone_function_impl(&state
, fxn
->impl
);
804 nfxn
->impl
->function
= nfxn
;
807 clone_reg_list(&state
, &ns
->registers
, &s
->registers
);
808 ns
->reg_alloc
= s
->reg_alloc
;
811 ns
->info
.name
= ralloc_strdup(ns
, ns
->info
.name
);
813 ns
->info
.label
= ralloc_strdup(ns
, ns
->info
.label
);
815 ns
->num_inputs
= s
->num_inputs
;
816 ns
->num_uniforms
= s
->num_uniforms
;
817 ns
->num_outputs
= s
->num_outputs
;
818 ns
->num_shared
= s
->num_shared
;
820 free_clone_state(&state
);