From 96c32d77763c4b561f751ca360e6539a3c5e7f4d Mon Sep 17 00:00:00 2001 From: Caio Marcelo de Oliveira Filho Date: Mon, 14 Jan 2019 15:28:33 -0800 Subject: [PATCH] nir/copy_prop_vars: handle load/store of vector elements When direct array deref is used on a vector type (for loads and stores), copy_prop_vars is now smart to propagate values it knows about. Given a 'vec4 v', storing to v[3] will update the copy entry for v and it is equivalent to a write to v.w. Loading from v[1] will try first to see if there's a known value for v.y -- and drop the load in that case. The copy entries still always refer to the entire vectors, so the operations happen on the parent deref (the 'vector') and the values are fixed accordingly. It might be the case now that certain entries have not only different SSA defs in each element but also those come from different components than they are set to, because stores to individual elements always come from a SSA definition with a single component. Tests related to these cases are now enabled. v2: Instead of asserting on invalid indices, "load" an undef and remove the store. (Jason) v3: Merge code path for the cases of is_array_deref_of_vector into the regular code path. Add a base_index parameter to value_set_from_value. (code changes by Jason) v4: Removed the get_entry_for_deref helper, now being used only once. Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir_opt_copy_prop_vars.c | 140 +++++++++++++++++----- src/compiler/nir/tests/vars_tests.cpp | 8 +- 2 files changed, 114 insertions(+), 34 deletions(-) diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index e59cd18b98d..b95a455a109 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -288,6 +288,15 @@ copy_entry_remove(struct util_dynarray *copies, *entry = util_dynarray_pop(copies, struct copy_entry); } +static bool +is_array_deref_of_vector(nir_deref_instr *deref) +{ + if (deref->deref_type != nir_deref_type_array) + return false; + nir_deref_instr *parent = nir_deref_instr_parent(deref); + return glsl_type_is_vector(parent->type); +} + static struct copy_entry * lookup_entry_for_deref(struct util_dynarray *copies, nir_deref_instr *deref, @@ -386,8 +395,11 @@ apply_barrier_for_modes(struct util_dynarray *copies, static void value_set_from_value(struct value *value, const struct value *from, - unsigned write_mask) + unsigned base_index, unsigned write_mask) { + /* We can't have non-zero indexes with non-trivial write masks */ + assert(base_index == 0 || write_mask == 1); + if (from->is_ssa) { /* Clear value if it was being used as non-SSA. */ if (!value->is_ssa) @@ -396,8 +408,8 @@ value_set_from_value(struct value *value, const struct value *from, /* Only overwrite the written components */ for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { if (write_mask & (1 << i)) { - value->ssa.def[i] = from->ssa.def[i]; - value->ssa.component[i] = from->ssa.component[i]; + value->ssa.def[base_index + i] = from->ssa.def[i]; + value->ssa.component[base_index + i] = from->ssa.component[i]; } } } else { @@ -407,6 +419,42 @@ value_set_from_value(struct value *value, const struct value *from, } } +/* Try to load a single element of a vector from the copy_entry. If the data + * isn't available, just let the original intrinsic do the work. + */ +static bool +load_element_from_ssa_entry_value(struct copy_prop_var_state *state, + struct copy_entry *entry, + nir_builder *b, nir_intrinsic_instr *intrin, + struct value *value, unsigned index) +{ + const struct glsl_type *type = entry->dst->type; + unsigned num_components = glsl_get_vector_elements(type); + assert(index < num_components); + + /* We don't have the element available, so let the instruction do the work. */ + if (!entry->src.ssa.def[index]) + return false; + + b->cursor = nir_instr_remove(&intrin->instr); + intrin->instr.block = NULL; + + assert(entry->src.ssa.component[index] < + entry->src.ssa.def[index]->num_components); + nir_ssa_def *def = nir_channel(b, entry->src.ssa.def[index], + entry->src.ssa.component[index]); + + *value = (struct value) { + .is_ssa = true, + .ssa = { + .def = { def }, + .component = { 0 }, + }, + }; + + return true; +} + /* Do a "load" from an SSA-based entry return it in "value" as a value with a * single SSA def. Because an entry could reference multiple different SSA * defs, a vecN operation may be inserted to combine them into a single SSA @@ -419,8 +467,16 @@ static bool load_from_ssa_entry_value(struct copy_prop_var_state *state, struct copy_entry *entry, nir_builder *b, nir_intrinsic_instr *intrin, - struct value *value) + nir_deref_instr *src, struct value *value) { + if (is_array_deref_of_vector(src)) { + if (!nir_src_is_const(src->arr.index)) + return false; + + return load_element_from_ssa_entry_value(state, entry, b, intrin, value, + nir_src_as_uint(src->arr.index)); + } + *value = entry->src; assert(value->is_ssa); @@ -611,7 +667,7 @@ try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry, return false; if (entry->src.is_ssa) { - return load_from_ssa_entry_value(state, entry, b, intrin, value); + return load_from_ssa_entry_value(state, entry, b, intrin, src, value); } else { return load_from_deref_entry_value(state, entry, b, intrin, src, value); } @@ -639,15 +695,6 @@ invalidate_copies_for_cf_node(struct copy_prop_var_state *state, } } -static bool -is_array_deref_of_vector(nir_deref_instr *deref) -{ - if (deref->deref_type != nir_deref_type_array) - return false; - nir_deref_instr *parent = nir_deref_instr_parent(deref); - return glsl_type_is_vector(parent->type); -} - static void print_value(struct value *value, unsigned num_components) { @@ -756,9 +803,25 @@ copy_prop_vars_block(struct copy_prop_var_state *state, nir_deref_instr *src = nir_src_as_deref(intrin->src[0]); + int vec_index = 0; + nir_deref_instr *vec_src = src; if (is_array_deref_of_vector(src)) { - /* Not handled yet. This load won't invalidate existing copies. */ - break; + vec_src = nir_deref_instr_parent(src); + unsigned vec_comps = glsl_get_vector_elements(vec_src->type); + + /* Indirects are not handled yet. */ + if (!nir_src_is_const(src->arr.index)) + break; + + vec_index = nir_src_as_uint(src->arr.index); + + /* Loading from an invalid index yields an undef */ + if (vec_index >= vec_comps) { + b->cursor = nir_instr_remove(instr); + nir_ssa_def *u = nir_ssa_undef(b, 1, intrin->dest.ssa.bit_size); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(u)); + break; + } } struct copy_entry *src_entry = @@ -768,7 +831,8 @@ copy_prop_vars_block(struct copy_prop_var_state *state, if (value.is_ssa) { /* lookup_load has already ensured that we get a single SSA * value that has all of the channels. We just have to do the - * rewrite operation. + * rewrite operation. Note for array derefs of vectors, the + * channel 0 is used. */ if (intrin->instr.block) { /* The lookup left our instruction in-place. This means it @@ -804,14 +868,14 @@ copy_prop_vars_block(struct copy_prop_var_state *state, * contains what we're looking for. */ struct copy_entry *entry = - lookup_entry_for_deref(copies, src, nir_derefs_equal_bit); + lookup_entry_for_deref(copies, vec_src, nir_derefs_equal_bit); if (!entry) - entry = copy_entry_create(copies, src); + entry = copy_entry_create(copies, vec_src); /* Update the entry with the value of the load. This way * we can potentially remove subsequent loads. */ - value_set_from_value(&entry->src, &value, + value_set_from_value(&entry->src, &value, vec_index, (1 << intrin->num_components) - 1); break; } @@ -820,6 +884,29 @@ copy_prop_vars_block(struct copy_prop_var_state *state, if (debug) dump_instr(instr); nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); + assert(glsl_type_is_vector_or_scalar(dst->type)); + + int vec_index = 0; + nir_deref_instr *vec_dst = dst; + if (is_array_deref_of_vector(dst)) { + vec_dst = nir_deref_instr_parent(dst); + unsigned vec_comps = glsl_get_vector_elements(vec_dst->type); + + /* Indirects are not handled yet. Kill everything */ + if (!nir_src_is_const(dst->arr.index)) { + kill_aliases(copies, vec_dst, (1 << vec_comps) - 1); + break; + } + + vec_index = nir_src_as_uint(dst->arr.index); + + /* Storing to an invalid index is a no-op. */ + if (vec_index >= vec_comps) { + nir_instr_remove(instr); + break; + } + } + struct copy_entry *entry = lookup_entry_for_deref(copies, dst, nir_derefs_equal_bit); if (entry && value_equals_store_src(&entry->src, intrin)) { @@ -827,21 +914,14 @@ copy_prop_vars_block(struct copy_prop_var_state *state, * store is redundant so remove it. */ nir_instr_remove(instr); - } else if (is_array_deref_of_vector(dst)) { - /* Not handled yet. Writing into an element of 'dst' invalidates - * any related entries in copies. - */ - nir_deref_instr *vector = nir_deref_instr_parent(dst); - unsigned vector_components = glsl_get_vector_elements(vector->type); - kill_aliases(copies, vector, (1 << vector_components) - 1); } else { struct value value = {0}; value_set_ssa_components(&value, intrin->src[1].ssa, intrin->num_components); unsigned wrmask = nir_intrinsic_write_mask(intrin); struct copy_entry *entry = - get_entry_and_kill_aliases(copies, dst, wrmask); - value_set_from_value(&entry->src, &value, wrmask); + get_entry_and_kill_aliases(copies, vec_dst, wrmask); + value_set_from_value(&entry->src, &value, vec_index, wrmask); } break; @@ -903,7 +983,7 @@ copy_prop_vars_block(struct copy_prop_var_state *state, struct copy_entry *dst_entry = get_entry_and_kill_aliases(copies, dst, full_mask); - value_set_from_value(&dst_entry->src, &value, full_mask); + value_set_from_value(&dst_entry->src, &value, 0, full_mask); break; } diff --git a/src/compiler/nir/tests/vars_tests.cpp b/src/compiler/nir/tests/vars_tests.cpp index e86b06dc4a9..03177a465d2 100644 --- a/src/compiler/nir/tests/vars_tests.cpp +++ b/src/compiler/nir/tests/vars_tests.cpp @@ -461,7 +461,7 @@ TEST_F(nir_copy_prop_vars_test, simple_store_load_in_two_blocks) } } -TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_reuses_previous_load) +TEST_F(nir_copy_prop_vars_test, load_direct_array_deref_on_vector_reuses_previous_load) { nir_variable *in0 = create_ivec2(nir_var_mem_ssbo, "in0"); nir_variable *in1 = create_ivec2(nir_var_mem_ssbo, "in1"); @@ -497,7 +497,7 @@ TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_reuse ASSERT_TRUE(nir_src_as_alu_instr(&store->src[1])); } -TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_reuses_previous_copy) +TEST_F(nir_copy_prop_vars_test, load_direct_array_deref_on_vector_reuses_previous_copy) { nir_variable *in0 = create_ivec2(nir_var_mem_ssbo, "in0"); nir_variable *vec = create_ivec2(nir_var_mem_ssbo, "vec"); @@ -521,7 +521,7 @@ TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_reuse ASSERT_EQ(nir_intrinsic_get_var(load, 0), in0); } -TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_gets_reused) +TEST_F(nir_copy_prop_vars_test, load_direct_array_deref_on_vector_gets_reused) { nir_variable *in0 = create_ivec2(nir_var_mem_ssbo, "in0"); nir_variable *vec = create_ivec2(nir_var_mem_ssbo, "vec"); @@ -555,7 +555,7 @@ TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_gets_ ASSERT_TRUE(nir_src_as_alu_instr(&store->src[1])); } -TEST_F(nir_copy_prop_vars_test, DISABLED_store_load_direct_array_deref_on_vector) +TEST_F(nir_copy_prop_vars_test, store_load_direct_array_deref_on_vector) { nir_variable *vec = create_ivec2(nir_var_mem_ssbo, "vec"); nir_variable *out0 = create_int(nir_var_mem_ssbo, "out0"); -- 2.30.2