X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir_opt_constant_folding.c;h=9dcb464c72452f1fcb94fea128fba5c7c7d48500;hb=bc123c396a99b2f6ff845792374d6a8d5de5d15e;hp=fe34f4ade8b7de030e8f4e9e1aa3df9d704b1ce6;hpb=ed5af9437344f932f8cbebfa776f873d16a8aae1;p=mesa.git diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c index fe34f4ade8b..9dcb464c724 100644 --- a/src/compiler/nir/nir_opt_constant_folding.c +++ b/src/compiler/nir/nir_opt_constant_folding.c @@ -33,15 +33,16 @@ */ struct constant_fold_state { - void *mem_ctx; - nir_function_impl *impl; - bool progress; + nir_shader *shader; + unsigned execution_mode; + bool has_load_constant; + bool has_indirect_load_const; }; static bool -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) +constant_fold_alu_instr(struct constant_fold_state *state, nir_alu_instr *instr) { - nir_const_value src[4]; + nir_const_value src[NIR_MAX_VEC_COMPONENTS][NIR_MAX_VEC_COMPONENTS]; if (!instr->dest.dest.is_ssa) return false; @@ -64,9 +65,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) return false; if (bit_size == 0 && - !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) { + !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_types[i])) bit_size = instr->src[i].src.ssa->bit_size; - } nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; @@ -76,20 +76,7 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i); j++) { - switch(load_const->def.bit_size) { - case 64: - src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]]; - break; - case 32: - src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]]; - break; - case 16: - src[i].u16[j] = load_const->value.u16[instr->src[i].swizzle[j]]; - break; - case 8: - src[i].u8[j] = load_const->value.u8[instr->src[i].swizzle[j]]; - break; - } + src[i][j] = load_const->value[instr->src[i].swizzle[j]]; } /* We shouldn't have any source modifiers in the optimization loop. */ @@ -102,16 +89,20 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) /* We shouldn't have any saturate modifiers in the optimization loop. */ assert(!instr->dest.saturate); - nir_const_value dest = - nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, - bit_size, src); + nir_const_value dest[NIR_MAX_VEC_COMPONENTS]; + nir_const_value *srcs[NIR_MAX_VEC_COMPONENTS]; + memset(dest, 0, sizeof(dest)); + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; ++i) + srcs[i] = src[i]; + nir_eval_const_opcode(instr->op, dest, instr->dest.dest.ssa.num_components, + bit_size, srcs, state->execution_mode); nir_load_const_instr *new_instr = - nir_load_const_instr_create(mem_ctx, + nir_load_const_instr_create(state->shader, instr->dest.dest.ssa.num_components, instr->dest.dest.ssa.bit_size); - new_instr->value = dest; + memcpy(new_instr->value, dest, sizeof(*new_instr->value) * new_instr->def.num_components); nir_instr_insert_before(&instr->instr, &new_instr->instr); @@ -125,102 +116,88 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) } static bool -constant_fold_deref(nir_instr *instr, nir_deref_var *deref) -{ - bool progress = false; - - for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { - if (tail->deref_type != nir_deref_type_array) - continue; - - nir_deref_array *arr = nir_deref_as_array(tail); - - if (arr->deref_array_type == nir_deref_array_type_indirect && - arr->indirect.is_ssa && - arr->indirect.ssa->parent_instr->type == nir_instr_type_load_const) { - nir_load_const_instr *indirect = - nir_instr_as_load_const(arr->indirect.ssa->parent_instr); - - arr->base_offset += indirect->value.u32[0]; - - /* Clear out the source */ - nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL)); - - arr->deref_array_type = nir_deref_array_type_direct; - - progress = true; - } - } - - return progress; -} - -static bool -constant_fold_intrinsic_instr(nir_intrinsic_instr *instr) +constant_fold_intrinsic_instr(struct constant_fold_state *state, nir_intrinsic_instr *instr) { bool progress = false; - unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; - for (unsigned i = 0; i < num_vars; i++) { - progress |= constant_fold_deref(&instr->instr, instr->variables[i]); - } - - if (instr->intrinsic == nir_intrinsic_discard_if) { - nir_const_value *src_val = nir_src_as_const_value(instr->src[0]); - if (src_val && src_val->u32[0] == NIR_FALSE) { + if ((instr->intrinsic == nir_intrinsic_demote_if || + instr->intrinsic == nir_intrinsic_discard_if) && + nir_src_is_const(instr->src[0])) { + if (nir_src_as_bool(instr->src[0])) { + nir_intrinsic_op op = instr->intrinsic == nir_intrinsic_discard_if ? + nir_intrinsic_discard : + nir_intrinsic_demote; + nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(state->shader, op); + nir_instr_insert_before(&instr->instr, &new_instr->instr); nir_instr_remove(&instr->instr); progress = true; - } else if (src_val && src_val->u32[0] == NIR_TRUE) { - /* This method of getting a nir_shader * from a nir_instr is - * admittedly gross, but given the rarity of hitting this case I think - * it's preferable to plumbing an otherwise unused nir_shader * - * parameter through four functions to get here. - */ - nir_cf_node *cf_node = &instr->instr.block->cf_node; - nir_function_impl *impl = nir_cf_node_get_function(cf_node); - nir_shader *shader = impl->function->shader; - - nir_intrinsic_instr *discard = - nir_intrinsic_instr_create(shader, nir_intrinsic_discard); - nir_instr_insert_before(&instr->instr, &discard->instr); + } else { + /* We're not discarding, just delete the instruction */ nir_instr_remove(&instr->instr); progress = true; } - } + } else if (instr->intrinsic == nir_intrinsic_load_constant) { + state->has_load_constant = true; - return progress; -} + if (!nir_src_is_const(instr->src[0])) { + state->has_indirect_load_const = true; + return progress; + } -static bool -constant_fold_tex_instr(nir_tex_instr *instr) -{ - bool progress = false; + unsigned offset = nir_src_as_uint(instr->src[0]); + unsigned base = nir_intrinsic_base(instr); + unsigned range = nir_intrinsic_range(instr); + assert(base + range <= state->shader->constant_data_size); + + nir_instr *new_instr = NULL; + if (offset >= range) { + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->shader, + instr->num_components, + instr->dest.ssa.bit_size); + + nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&undef->def)); + new_instr = &undef->instr; + } else { + nir_load_const_instr *load_const = + nir_load_const_instr_create(state->shader, + instr->num_components, + instr->dest.ssa.bit_size); + + uint8_t *data = (uint8_t*)state->shader->constant_data + base; + for (unsigned i = 0; i < instr->num_components; i++) { + unsigned bytes = instr->dest.ssa.bit_size / 8; + bytes = MIN2(bytes, range - offset); + + memcpy(&load_const->value[i].u64, data + offset, bytes); + offset += bytes; + } - if (instr->texture) - progress |= constant_fold_deref(&instr->instr, instr->texture); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load_const->def)); + new_instr = &load_const->instr; + } - if (instr->sampler) - progress |= constant_fold_deref(&instr->instr, instr->sampler); + nir_instr_insert_before(&instr->instr, new_instr); + nir_instr_remove(&instr->instr); + progress = true; + } return progress; } static bool -constant_fold_block(nir_block *block, void *mem_ctx) +constant_fold_block(struct constant_fold_state *state, nir_block *block) { bool progress = false; nir_foreach_instr_safe(instr, block) { switch (instr->type) { case nir_instr_type_alu: - progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx); + progress |= constant_fold_alu_instr(state, nir_instr_as_alu(instr)); break; case nir_instr_type_intrinsic: progress |= - constant_fold_intrinsic_instr(nir_instr_as_intrinsic(instr)); - break; - case nir_instr_type_tex: - progress |= constant_fold_tex_instr(nir_instr_as_tex(instr)); + constant_fold_intrinsic_instr(state, nir_instr_as_intrinsic(instr)); break; default: /* Don't know how to constant fold */ @@ -232,18 +209,20 @@ constant_fold_block(nir_block *block, void *mem_ctx) } static bool -nir_opt_constant_folding_impl(nir_function_impl *impl) +nir_opt_constant_folding_impl(struct constant_fold_state *state, nir_function_impl *impl) { - void *mem_ctx = ralloc_parent(impl); bool progress = false; nir_foreach_block(block, impl) { - progress |= constant_fold_block(block, mem_ctx); + progress |= constant_fold_block(state, block); } - if (progress) + if (progress) { nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); + } else { + nir_metadata_preserve(impl, nir_metadata_all); + } return progress; } @@ -252,10 +231,25 @@ bool nir_opt_constant_folding(nir_shader *shader) { bool progress = false; + struct constant_fold_state state; + state.shader = shader; + state.execution_mode = shader->info.float_controls_execution_mode; + state.has_load_constant = false; + state.has_indirect_load_const = false; nir_foreach_function(function, shader) { if (function->impl) - progress |= nir_opt_constant_folding_impl(function->impl); + progress |= nir_opt_constant_folding_impl(&state, function->impl); + } + + /* This doesn't free the constant data if there are no constant loads because + * the data might still be used but the loads have been lowered to load_ubo + */ + if (state.has_load_constant && !state.has_indirect_load_const && + shader->constant_data_size) { + ralloc_free(shader->constant_data); + shader->constant_data = NULL; + shader->constant_data_size = 0; } return progress;