* due to array access heuristics.
*/
static bool
-force_unroll_array_access(loop_info_state *state, nir_shader *ns,
- nir_deref_var *variable)
+force_unroll_array_access_var(loop_info_state *state, nir_shader *ns,
+ nir_deref_var *variable)
{
nir_deref *tail = &variable->deref;
return false;
}
+static bool
+force_unroll_array_access(loop_info_state *state, nir_shader *ns,
+ nir_deref_instr *deref)
+{
+ for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+ if (d->deref_type != nir_deref_type_array)
+ continue;
+
+ assert(d->arr.index.is_ssa);
+ nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
+
+ if (array_index->type != basic_induction)
+ continue;
+
+ nir_deref_instr *parent = nir_deref_instr_parent(d);
+ assert(glsl_type_is_array(parent->type) ||
+ glsl_type_is_matrix(parent->type));
+ if (glsl_get_length(parent->type) == state->loop->info->trip_count) {
+ state->loop->info->force_unroll = true;
+ return true;
+ }
+
+ if (deref->mode & state->indirect_mask) {
+ state->loop->info->force_unroll = true;
+ return true;
+ }
+ }
+
+ return false;
+}
+
static bool
force_unroll_heuristics(loop_info_state *state, nir_shader *ns,
nir_block *block)
unsigned num_vars =
nir_intrinsic_infos[intrin->intrinsic].num_variables;
for (unsigned i = 0; i < num_vars; i++) {
- if (force_unroll_array_access(state, ns, intrin->variables[i]))
+ if (force_unroll_array_access_var(state, ns, intrin->variables[i]))
return true;
}
}
+
+ if (intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_store_deref ||
+ intrin->intrinsic == nir_intrinsic_copy_deref) {
+ if (force_unroll_array_access(state, ns,
+ nir_src_as_deref(intrin->src[0])))
+ return true;
+
+ if (intrin->intrinsic == nir_intrinsic_copy_deref &&
+ force_unroll_array_access(state, ns,
+ nir_src_as_deref(intrin->src[1])))
+ return true;
+ }
}
return false;
nir_loop_analyze_impl(nir_function_impl *impl,
nir_variable_mode indirect_mask)
{
- nir_assert_lowered_derefs(impl->function->shader, nir_lower_load_store_derefs);
nir_index_ssa_defs(impl);
foreach_list_typed(nir_cf_node, node, node, &impl->body)
process_loops(node, indirect_mask);