X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir_clone.c;h=7236f0872a71b73e894a5bf2a033eda587e7646a;hb=d80c342d898275cbd6266c37e70dc422590d7d8c;hp=e889f19d24e5b8aa3a10cd3a16edb2e3abddcc40;hpb=3663a2397e47da9b766b0c4239a8b74ac77b5d04;p=mesa.git diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index e889f19d24e..7236f0872a7 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -22,7 +22,7 @@ */ #include "nir.h" -#include "nir_control_flow_private.h" +#include "nir_control_flow.h" /* Secret Decoder Ring: * clone_foo(): @@ -35,6 +35,13 @@ typedef struct { /* True if we are cloning an entire shader. */ bool global_clone; + /* If true allows the clone operation to fall back to the original pointer + * if no clone pointer is found in the remap table. This allows us to + * clone a loop body without having to add srcs from outside the loop to + * the remap table. This is useful for loop unrolling. + */ + bool allow_remap_fallback; + /* maps orig ptr -> cloned ptr: */ struct hash_table *remap_table; @@ -46,11 +53,19 @@ typedef struct { } clone_state; static void -init_clone_state(clone_state *state, bool global) +init_clone_state(clone_state *state, struct hash_table *remap_table, + bool global, bool allow_remap_fallback) { state->global_clone = global; - state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + state->allow_remap_fallback = allow_remap_fallback; + + if (remap_table) { + state->remap_table = remap_table; + } else { + state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + } + list_inithead(&state->phi_srcs); } @@ -72,9 +87,10 @@ _lookup_ptr(clone_state *state, const void *ptr, bool global) return (void *)ptr; entry = _mesa_hash_table_search(state->remap_table, ptr); - assert(entry && "Failed to find pointer!"); - if (!entry) - return NULL; + if (!entry) { + assert(state->allow_remap_fallback); + return (void *)ptr; + } return entry->data; } @@ -114,7 +130,7 @@ nir_constant_clone(const nir_constant *c, nir_variable *nvar) { nir_constant *nc = ralloc(nvar, nir_constant); - nc->value = c->value; + memcpy(nc->values, c->values, sizeof(nc->values)); nc->num_elements = c->num_elements; nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); for (unsigned i = 0; i < c->num_elements; i++) { @@ -124,7 +140,7 @@ nir_constant_clone(const nir_constant *c, nir_variable *nvar) return nc; } -/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid +/* NOTE: for cloning nir_variables, bypass nir_variable_create to avoid * having to deal with locals and globals separately: */ nir_variable * @@ -169,7 +185,7 @@ clone_var_list(clone_state *state, struct exec_list *dst, } } -/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create() +/* NOTE: for cloning nir_registers, bypass nir_global/local_reg_create() * to avoid having to deal with locals and globals separately: */ static nir_register * @@ -179,6 +195,7 @@ clone_register(clone_state *state, const nir_register *reg) add_remap(state, nreg, reg); nreg->num_components = reg->num_components; + nreg->bit_size = reg->bit_size; nreg->num_array_elems = reg->num_array_elems; nreg->index = reg->index; nreg->name = ralloc_strdup(nreg, reg->name); @@ -329,6 +346,46 @@ clone_alu(clone_state *state, const nir_alu_instr *alu) return nalu; } +static nir_deref_instr * +clone_deref_instr(clone_state *state, const nir_deref_instr *deref) +{ + nir_deref_instr *nderef = + nir_deref_instr_create(state->ns, deref->deref_type); + + __clone_dst(state, &nderef->instr, &nderef->dest, &deref->dest); + + nderef->mode = deref->mode; + nderef->type = deref->type; + + if (deref->deref_type == nir_deref_type_var) { + nderef->var = remap_var(state, deref->var); + return nderef; + } + + __clone_src(state, &nderef->instr, &nderef->parent, &deref->parent); + + switch (deref->deref_type) { + case nir_deref_type_struct: + nderef->strct.index = deref->strct.index; + break; + + case nir_deref_type_array: + __clone_src(state, &nderef->instr, + &nderef->arr.index, &deref->arr.index); + break; + + case nir_deref_type_array_wildcard: + case nir_deref_type_cast: + /* Nothing to do */ + break; + + default: + unreachable("Invalid instruction deref type"); + } + + return nderef; +} + static nir_intrinsic_instr * clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr) { @@ -485,6 +542,8 @@ clone_instr(clone_state *state, const nir_instr *instr) switch (instr->type) { case nir_instr_type_alu: return &clone_alu(state, nir_instr_as_alu(instr))->instr; + case nir_instr_type_deref: + return &clone_deref_instr(state, nir_instr_as_deref(instr))->instr; case nir_instr_type_intrinsic: return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr; case nir_instr_type_load_const: @@ -522,7 +581,7 @@ clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) /* We need this for phi sources */ add_remap(state, nblk, blk); - nir_foreach_instr(blk, instr) { + nir_foreach_instr(instr, blk) { if (instr->type == nir_instr_type_phi) { /* Phi instructions are a bit of a special case when cloning because * we don't want inserting the instruction to automatically handle @@ -592,6 +651,59 @@ clone_cf_list(clone_state *state, struct exec_list *dst, } } +/* After we've cloned almost everything, we have to walk the list of phi + * sources and fix them up. Thanks to loops, the block and SSA value for a + * phi source may not be defined when we first encounter it. Instead, we + * add it to the phi_srcs list and we fix it up here. + */ +static void +fixup_phi_srcs(clone_state *state) +{ + list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { + src->pred = remap_local(state, src->pred); + + /* Remove from this list */ + list_del(&src->src.use_link); + + if (src->src.is_ssa) { + src->src.ssa = remap_local(state, src->src.ssa); + list_addtail(&src->src.use_link, &src->src.ssa->uses); + } else { + src->src.reg.reg = remap_reg(state, src->src.reg.reg); + list_addtail(&src->src.use_link, &src->src.reg.reg->uses); + } + } + assert(list_empty(&state->phi_srcs)); +} + +void +nir_cf_list_clone(nir_cf_list *dst, nir_cf_list *src, nir_cf_node *parent, + struct hash_table *remap_table) +{ + exec_list_make_empty(&dst->list); + dst->impl = src->impl; + + if (exec_list_is_empty(&src->list)) + return; + + clone_state state; + init_clone_state(&state, remap_table, false, true); + + /* We use the same shader */ + state.ns = src->impl->function->shader; + + /* The control-flow code assumes that the list of cf_nodes always starts + * and ends with a block. We start by adding an empty block. + */ + nir_block *nblk = nir_block_create(state.ns); + nblk->cf_node.parent = parent; + exec_list_push_tail(&dst->list, &nblk->cf_node.node); + + clone_cf_list(&state, &dst->list, &src->list); + + fixup_phi_srcs(&state); +} + static nir_function_impl * clone_function_impl(clone_state *state, const nir_function_impl *fi) { @@ -613,21 +725,7 @@ clone_function_impl(clone_state *state, const nir_function_impl *fi) clone_cf_list(state, &nfi->body, &fi->body); - /* After we've cloned almost everything, we have to walk the list of phi - * sources and fix them up. Thanks to loops, the block and SSA value for a - * phi source may not be defined when we first encounter it. Instead, we - * add it to the phi_srcs list and we fix it up here. - */ - list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { - src->pred = remap_local(state, src->pred); - assert(src->src.is_ssa); - src->src.ssa = remap_local(state, src->src.ssa); - - /* Remove from this list and place in the uses of the SSA def */ - list_del(&src->src.use_link); - list_addtail(&src->src.use_link, &src->src.ssa->uses); - } - assert(list_empty(&state->phi_srcs)); + fixup_phi_srcs(state); /* All metadata is invalidated in the cloning process */ nfi->valid_metadata = 0; @@ -639,7 +737,7 @@ nir_function_impl * nir_function_impl_clone(const nir_function_impl *fi) { clone_state state; - init_clone_state(&state, false); + init_clone_state(&state, NULL, false, false); /* We use the same shader */ state.ns = fi->function->shader; @@ -668,7 +766,7 @@ clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) /* At first glance, it looks like we should clone the function_impl here. * However, call instructions need to be able to reference at least the - * function and those will get processed as we clone the function_impl's. + * function and those will get processed as we clone the function_impls. * We stop here and do function_impls as a second pass. */ @@ -679,9 +777,9 @@ nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s) { clone_state state; - init_clone_state(&state, true); + init_clone_state(&state, NULL, true, false); - nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options); + nir_shader *ns = nir_shader_create(mem_ctx, s->info.stage, s->options, NULL); state.ns = ns; clone_var_list(&state, &ns->uniforms, &s->uniforms); @@ -696,11 +794,11 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) clone_function(&state, fxn, ns); /* Only after all functions are cloned can we clone the actual function - * implementations. This is because nir_call_instr's need to reference the + * implementations. This is because nir_call_instrs need to reference the * functions of other functions and we don't know what order the functions * will have in the list. */ - nir_foreach_function(s, fxn) { + nir_foreach_function(fxn, s) { nir_function *nfxn = remap_global(&state, fxn); nfxn->impl = clone_function_impl(&state, fxn->impl); nfxn->impl->function = nfxn; @@ -718,6 +816,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) ns->num_uniforms = s->num_uniforms; ns->num_outputs = s->num_outputs; ns->num_shared = s->num_shared; + ns->lowered_derefs = s->lowered_derefs; free_clone_state(&state);