From 3cc914921e69b4458d4480a64aafe0f2823c76e2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 23 Sep 2019 14:36:32 -0700 Subject: [PATCH] nir: Factor out most of the algebraic passes C code to .c/.h. Working on the algebraic implementation, I was being driven nuts by my editor not highlighting and handling indentation for the C code. It turns out that it's basically not pass-specific code, and we can move it over to the relevant .c file. Replaces 30KB of code with 34KB of data on my i965 build. No perf diff on shader-db (n=3) Reviewed-by: Ian Romanick Reviewed-by: Connor Abbott --- src/compiler/nir/nir_algebraic.py | 168 ++++-------------------------- src/compiler/nir/nir_search.c | 126 ++++++++++++++++++++++ src/compiler/nir/nir_search.h | 25 +++++ 3 files changed, 173 insertions(+), 146 deletions(-) diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py index 1645ec50fcd..d8d0a95a6d3 100644 --- a/src/compiler/nir/nir_algebraic.py +++ b/src/compiler/nir/nir_algebraic.py @@ -1050,30 +1050,6 @@ _algebraic_pass_template = mako.template.Template(""" % endfor */ -#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS -#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS - -struct transform { - const nir_search_expression *search; - const nir_search_value *replace; - unsigned condition_offset; -}; - -struct per_op_table { - const uint16_t *filter; - unsigned num_filtered_states; - const uint16_t *table; -}; - -/* Note: these must match the start states created in - * TreeAutomaton._build_table() - */ - -/* WILDCARD_STATE = 0 is set by zeroing the state array */ -static const uint16_t CONST_STATE = 1; - -#endif - <% cache = {} %> % for xform in xforms: ${xform.search.render(cache)} @@ -1114,129 +1090,25 @@ static const struct per_op_table ${pass_name}_table[nir_num_search_ops] = { % endfor }; -static void -${pass_name}_pre_block(nir_block *block, uint16_t *states) -{ - nir_foreach_instr(instr, block) { - switch (instr->type) { - case nir_instr_type_alu: { - nir_alu_instr *alu = nir_instr_as_alu(instr); - nir_op op = alu->op; - uint16_t search_op = nir_search_op_for_nir_op(op); - const struct per_op_table *tbl = &${pass_name}_table[search_op]; - if (tbl->num_filtered_states == 0) - continue; - - /* Calculate the index into the transition table. Note the index - * calculated must match the iteration order of Python's - * itertools.product(), which was used to emit the transition - * table. - */ - uint16_t index = 0; - for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) { - index *= tbl->num_filtered_states; - index += tbl->filter[states[alu->src[i].src.ssa->index]]; - } - states[alu->dest.dest.ssa.index] = tbl->table[index]; - break; - } - - case nir_instr_type_load_const: { - nir_load_const_instr *load_const = nir_instr_as_load_const(instr); - states[load_const->def.index] = CONST_STATE; - break; - } - - default: - break; - } - } -} - -static bool -${pass_name}_block(nir_builder *build, nir_block *block, - struct hash_table *range_ht, - const uint16_t *states, const bool *condition_flags) -{ - bool progress = false; - const unsigned execution_mode = build->shader->info.float_controls_execution_mode; - - nir_foreach_instr_reverse_safe(instr, block) { - if (instr->type != nir_instr_type_alu) - continue; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - if (!alu->dest.dest.is_ssa) - continue; - - unsigned bit_size = alu->dest.dest.ssa.bit_size; - const bool ignore_inexact = - nir_is_float_control_signed_zero_inf_nan_preserve(execution_mode, bit_size) || - nir_is_denorm_flush_to_zero(execution_mode, bit_size); - - switch (states[alu->dest.dest.ssa.index]) { +const struct transform *${pass_name}_transforms[] = { % for i in range(len(automaton.state_patterns)): - case ${i}: - % if automaton.state_patterns[i]: - for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_state${i}_xforms); i++) { - const struct transform *xform = &${pass_name}_state${i}_xforms[i]; - if (condition_flags[xform->condition_offset] && - !(xform->search->inexact && ignore_inexact) && - nir_replace_instr(build, alu, range_ht, xform->search, xform->replace)) { - _mesa_hash_table_clear(range_ht, NULL); - progress = true; - break; - } - } - % endif - break; + % if automaton.state_patterns[i]: + ${pass_name}_state${i}_xforms, + % else: + NULL, + % endif % endfor - default: assert(0); - } - } - - return progress; -} - -static bool -${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags) -{ - bool progress = false; - - nir_builder build; - nir_builder_init(&build, impl); - - /* Note: it's important here that we're allocating a zeroed array, since - * state 0 is the default state, which means we don't have to visit - * anything other than constants and ALU instructions. - */ - uint16_t *states = calloc(impl->ssa_alloc, sizeof(*states)); - - struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL); - - nir_foreach_block(block, impl) { - ${pass_name}_pre_block(block, states); - } - - nir_foreach_block_reverse(block, impl) { - progress |= ${pass_name}_block(&build, block, range_ht, states, condition_flags); - } - - ralloc_free(range_ht); - free(states); - - if (progress) { - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - } else { -#ifndef NDEBUG - impl->valid_metadata &= ~nir_metadata_not_properly_reset; -#endif - } - - return progress; -} +}; +const uint16_t ${pass_name}_transform_counts[] = { +% for i in range(len(automaton.state_patterns)): + % if automaton.state_patterns[i]: + (uint16_t)ARRAY_SIZE(${pass_name}_state${i}_xforms), + % else: + 0, + % endif +% endfor +}; bool ${pass_name}(nir_shader *shader) @@ -1253,8 +1125,12 @@ ${pass_name}(nir_shader *shader) % endfor nir_foreach_function(function, shader) { - if (function->impl) - progress |= ${pass_name}_impl(function->impl, condition_flags); + if (function->impl) { + progress |= nir_algebraic_impl(function->impl, condition_flags, + ${pass_name}_transforms, + ${pass_name}_transform_counts, + ${pass_name}_table); + } } return progress; diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index d126319b256..db2a606c7b6 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -692,3 +692,129 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr, return ssa_val; } + +static void +nir_algebraic_automaton(nir_block *block, uint16_t *states, + const struct per_op_table *pass_op_table) +{ + nir_foreach_instr(instr, block) { + switch (instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + nir_op op = alu->op; + uint16_t search_op = nir_search_op_for_nir_op(op); + const struct per_op_table *tbl = &pass_op_table[search_op]; + if (tbl->num_filtered_states == 0) + continue; + + /* Calculate the index into the transition table. Note the index + * calculated must match the iteration order of Python's + * itertools.product(), which was used to emit the transition + * table. + */ + uint16_t index = 0; + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) { + index *= tbl->num_filtered_states; + index += tbl->filter[states[alu->src[i].src.ssa->index]]; + } + states[alu->dest.dest.ssa.index] = tbl->table[index]; + break; + } + + case nir_instr_type_load_const: { + nir_load_const_instr *load_const = nir_instr_as_load_const(instr); + states[load_const->def.index] = CONST_STATE; + break; + } + + default: + break; + } + } +} + +static bool +nir_algebraic_block(nir_builder *build, nir_block *block, + struct hash_table *range_ht, + const bool *condition_flags, + const struct transform **transforms, + const uint16_t *transform_counts, + const uint16_t *states) +{ + bool progress = false; + const unsigned execution_mode = build->shader->info.float_controls_execution_mode; + + nir_foreach_instr_reverse_safe(instr, block) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (!alu->dest.dest.is_ssa) + continue; + + unsigned bit_size = alu->dest.dest.ssa.bit_size; + const bool ignore_inexact = + nir_is_float_control_signed_zero_inf_nan_preserve(execution_mode, bit_size) || + nir_is_denorm_flush_to_zero(execution_mode, bit_size); + + int xform_idx = states[alu->dest.dest.ssa.index]; + for (uint16_t i = 0; i < transform_counts[xform_idx]; i++) { + const struct transform *xform = &transforms[xform_idx][i]; + if (condition_flags[xform->condition_offset] && + !(xform->search->inexact && ignore_inexact) && + nir_replace_instr(build, alu, range_ht, + xform->search, xform->replace)) { + _mesa_hash_table_clear(range_ht, NULL); + progress = true; + break; + } + } + } + + return progress; +} + +bool +nir_algebraic_impl(nir_function_impl *impl, + const bool *condition_flags, + const struct transform **transforms, + const uint16_t *transform_counts, + const struct per_op_table *pass_op_table) +{ + bool progress = false; + + nir_builder build; + nir_builder_init(&build, impl); + + /* Note: it's important here that we're allocating a zeroed array, since + * state 0 is the default state, which means we don't have to visit + * anything other than constants and ALU instructions. + */ + uint16_t *states = calloc(impl->ssa_alloc, sizeof(*states)); + + struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL); + + nir_foreach_block(block, impl) { + nir_algebraic_automaton(block, states, pass_op_table); + } + + nir_foreach_block_reverse(block, impl) { + progress |= nir_algebraic_block(&build, block, range_ht, condition_flags, + transforms, transform_counts, + states); + } + + ralloc_free(range_ht); + free(states); + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } else { +#ifndef NDEBUG + impl->valid_metadata &= ~nir_metadata_not_properly_reset; +#endif + } + + return progress; +} diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h index c86efc5ba3f..c5d7ba3895e 100644 --- a/src/compiler/nir/nir_search.h +++ b/src/compiler/nir/nir_search.h @@ -163,6 +163,25 @@ typedef struct { bool (*cond)(nir_alu_instr *instr); } nir_search_expression; +struct per_op_table { + const uint16_t *filter; + unsigned num_filtered_states; + const uint16_t *table; +}; + +struct transform { + const nir_search_expression *search; + const nir_search_value *replace; + unsigned condition_offset; +}; + +/* Note: these must match the start states created in + * TreeAutomaton._build_table() + */ + +/* WILDCARD_STATE = 0 is set by zeroing the state array */ +static const uint16_t CONST_STATE = 1; + NIR_DEFINE_CAST(nir_search_value_as_variable, nir_search_value, nir_search_variable, value, type, nir_search_value_variable) @@ -178,5 +197,11 @@ nir_replace_instr(struct nir_builder *b, nir_alu_instr *instr, struct hash_table *range_ht, const nir_search_expression *search, const nir_search_value *replace); +bool +nir_algebraic_impl(nir_function_impl *impl, + const bool *condition_flags, + const struct transform **transforms, + const uint16_t *transform_counts, + const struct per_op_table *pass_op_table); #endif /* _NIR_SEARCH_ */ -- 2.30.2