From 60177d5e2aec07ed6386a6935b118a356d58c4ec Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sat, 2 Oct 2010 22:57:17 -0700 Subject: [PATCH] glsl: Add an array splitting pass. I've had this code laying around almost done for a long time. The idea is like opt_structure_splitting, that we've got a bunch of transforms at the GLSL IR level that only understand scalars and vectors, which just skip complicated dereferences. While driver backends may manage some optimization after they split matrices up themselves, it would be better to bring all of our optimization to bear on the problem. While I wasn't expecting changes quite yet, a few programs end up winning: a gstreamer convolution shader, and the Humus dynamic branching demo: Total instructions: 269430 -> 269342 3/2148 programs affected (0.1%) 1498 -> 1410 instructions in affected programs (5.9% reduction) --- src/glsl/Makefile.sources | 1 + src/glsl/glsl_parser_extras.cpp | 1 + src/glsl/ir_optimization.h | 1 + src/glsl/opt_array_splitting.cpp | 377 +++++++++++++++++++++++++++++++ 4 files changed, 380 insertions(+) create mode 100644 src/glsl/opt_array_splitting.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 06728daf763..15f5e1f50a6 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -62,6 +62,7 @@ LIBGLSL_CXX_FILES := \ lower_vector.cpp \ lower_output_reads.cpp \ opt_algebraic.cpp \ + opt_array_splitting.cpp \ opt_constant_folding.cpp \ opt_constant_propagation.cpp \ opt_constant_variable.cpp \ diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 6547ad2d3b7..ae7a365f4b2 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -1044,6 +1044,7 @@ do_common_optimization(exec_list *ir, bool linked, progress = do_swizzle_swizzle(ir) || progress; progress = do_noop_swizzle(ir) || progress; + progress = optimize_split_arrays(ir, linked) || progress; progress = optimize_redundant_jumps(ir) || progress; loop_state *ls = analyze_loop_variables(ir); diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 085b96903cb..35678358307 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -74,6 +74,7 @@ bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); bool lower_clip_distance(exec_list *instructions); void lower_output_reads(exec_list *instructions); bool optimize_redundant_jumps(exec_list *instructions); +bool optimize_split_arrays(exec_list *instructions, bool linked); ir_rvalue * compare_index_block(exec_list *instructions, ir_variable *index, diff --git a/src/glsl/opt_array_splitting.cpp b/src/glsl/opt_array_splitting.cpp new file mode 100644 index 00000000000..1e278c16ae1 --- /dev/null +++ b/src/glsl/opt_array_splitting.cpp @@ -0,0 +1,377 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_array_splitting.cpp + * + * If an array is always dereferenced with a constant index, then + * split it apart into its elements, making it more amenable to other + * optimization passes. + * + * This skips uniform/varying arrays, which would need careful + * handling due to their ir->location fields tying them to the GL API + * and other shader stages. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_print_visitor.h" +#include "glsl_types.h" + +static bool debug = false; + +namespace opt_array_splitting { + +class variable_entry : public exec_node +{ +public: + variable_entry(ir_variable *var) + { + this->var = var; + this->whole_array_access = 0; + this->declaration = false; + this->components = NULL; + this->mem_ctx = NULL; + } + + ir_variable *var; /* The key: the variable's pointer. */ + + /** Number of times the variable is referenced, including assignments. */ + unsigned whole_array_access; + + bool declaration; /* If the variable had a decl in the instruction stream */ + + ir_variable **components; + + /** ralloc_parent(this->var) -- the shader's talloc context. */ + void *mem_ctx; +}; + +} /* namespace */ +using namespace opt_array_splitting; + +/** + * This class does a walk over the tree, coming up with the set of + * variables that could be split by looking to see if they are arrays + * that are only ever constant-index dereferenced. + */ +class ir_array_reference_visitor : public ir_hierarchical_visitor { +public: + ir_array_reference_visitor(void) + { + this->mem_ctx = ralloc_context(NULL); + this->variable_list.make_empty(); + } + + ~ir_array_reference_visitor(void) + { + ralloc_free(mem_ctx); + } + + bool get_split_list(exec_list *instructions, bool linked); + + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit(ir_dereference_variable *); + virtual ir_visitor_status visit_enter(ir_dereference_array *); + + variable_entry *get_variable_entry(ir_variable *var); + + /* List of variable_entry */ + exec_list variable_list; + + void *mem_ctx; +}; + +variable_entry * +ir_array_reference_visitor::get_variable_entry(ir_variable *var) +{ + assert(var); + + if (var->mode != ir_var_auto && + var->mode != ir_var_temporary) + return NULL; + + if (!var->type->is_array()) + return NULL; + + /* If the array hasn't been sized yet, we can't split it. After + * linking, this should be resolved. + */ + if (var->type->length == 0) + return NULL; + + foreach_iter(exec_list_iterator, iter, this->variable_list) { + variable_entry *entry = (variable_entry *)iter.get(); + if (entry->var == var) + return entry; + } + + variable_entry *entry = new(mem_ctx) variable_entry(var); + this->variable_list.push_tail(entry); + return entry; +} + + +ir_visitor_status +ir_array_reference_visitor::visit(ir_variable *ir) +{ + variable_entry *entry = this->get_variable_entry(ir); + + if (entry) + entry->declaration = true; + + return visit_continue; +} + +ir_visitor_status +ir_array_reference_visitor::visit(ir_dereference_variable *ir) +{ + variable_entry *entry = this->get_variable_entry(ir->var); + + /* If we made it to here, then the dereference of this array didn't + * have a constant index (see the visit_continue_with_parent + * below), so we can't split the variable. + */ + if (entry) + entry->whole_array_access++; + + return visit_continue; +} + +ir_visitor_status +ir_array_reference_visitor::visit_enter(ir_dereference_array *ir) +{ + ir_dereference_variable *deref = ir->array->as_dereference_variable(); + if (!deref) + return visit_continue; + + variable_entry *entry = this->get_variable_entry(deref->var); + + if (entry && !ir->array_index->as_constant()) + entry->whole_array_access++; + + return visit_continue_with_parent; +} + +bool +ir_array_reference_visitor::get_split_list(exec_list *instructions, + bool linked) +{ + visit_list_elements(this, instructions); + + /* If the shaders aren't linked yet, we can't mess with global + * declarations, which need to be matched by name across shaders. + */ + if (!linked) { + foreach_list(node, instructions) { + ir_variable *var = ((ir_instruction *)node)->as_variable(); + if (var) { + variable_entry *entry = get_variable_entry(var); + if (entry) + entry->remove(); + } + } + } + + /* Trim out variables we found that we can't split. */ + foreach_iter(exec_list_iterator, iter, variable_list) { + variable_entry *entry = (variable_entry *)iter.get(); + + if (debug) { + printf("array %s@%p: decl %d, whole_access %d\n", + entry->var->name, (void *) entry->var, entry->declaration, + entry->whole_array_access); + } + + if (!entry->declaration || entry->whole_array_access) { + entry->remove(); + } + } + + return !variable_list.is_empty(); +} + +/** This is the class that does the actual work of splitting. */ +class ir_array_splitting_visitor : public ir_rvalue_visitor { +public: + ir_array_splitting_visitor(exec_list *vars) + { + this->variable_list = vars; + } + + virtual ~ir_array_splitting_visitor() + { + } + + virtual ir_visitor_status visit_leave(ir_assignment *); + + void split_deref(ir_dereference **deref); + void handle_rvalue(ir_rvalue **rvalue); + variable_entry *get_splitting_entry(ir_variable *var); + + exec_list *variable_list; + void *mem_ctx; +}; + +variable_entry * +ir_array_splitting_visitor::get_splitting_entry(ir_variable *var) +{ + assert(var); + + if (!var->type->is_array()) + return NULL; + + foreach_iter(exec_list_iterator, iter, *this->variable_list) { + variable_entry *entry = (variable_entry *)iter.get(); + if (entry->var == var) { + return entry; + } + } + + return NULL; +} + +void +ir_array_splitting_visitor::split_deref(ir_dereference **deref) +{ + ir_dereference_array *deref_array = (*deref)->as_dereference_array(); + if (!deref_array) + return; + + ir_dereference_variable *deref_var = deref_array->array->as_dereference_variable(); + if (!deref_var) + return; + ir_variable *var = deref_var->var; + + variable_entry *entry = get_splitting_entry(var); + if (!entry) + return; + + ir_constant *constant = deref_array->array_index->as_constant(); + assert(constant); + + if (constant->value.i[0] < (int)var->type->length) { + *deref = new(entry->mem_ctx) + ir_dereference_variable(entry->components[constant->value.i[0]]); + } else { + /* There was a constant array access beyond the end of the + * array. This might have happened due to constant folding + * after the initial parse. This produces an undefined value, + * but shouldn't crash. Just give them an uninitialized + * variable. + */ + ir_variable *temp = new(entry->mem_ctx) ir_variable(deref_array->type, + "undef", + ir_var_temporary); + entry->components[0]->insert_before(temp); + *deref = new(entry->mem_ctx) ir_dereference_variable(temp); + } +} + +void +ir_array_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + + if (!deref) + return; + + split_deref(&deref); + *rvalue = deref; +} + +ir_visitor_status +ir_array_splitting_visitor::visit_leave(ir_assignment *ir) +{ + /* The normal rvalue visitor skips the LHS of assignments, but we + * need to process those just the same. + */ + ir_rvalue *lhs = ir->lhs; + + handle_rvalue(&lhs); + ir->lhs = lhs->as_dereference(); + + ir->lhs->accept(this); + + handle_rvalue(&ir->rhs); + ir->rhs->accept(this); + + if (ir->condition) { + handle_rvalue(&ir->condition); + ir->condition->accept(this); + } + + return visit_continue; +} + +bool +optimize_split_arrays(exec_list *instructions, bool linked) +{ + ir_array_reference_visitor refs; + if (!refs.get_split_list(instructions, linked)) + return false; + + void *mem_ctx = ralloc_context(NULL); + + /* Replace the decls of the arrays to be split with their split + * components. + */ + foreach_iter(exec_list_iterator, iter, refs.variable_list) { + variable_entry *entry = (variable_entry *)iter.get(); + const struct glsl_type *type = entry->var->type; + + entry->mem_ctx = ralloc_parent(entry->var); + + entry->components = ralloc_array(mem_ctx, + ir_variable *, + type->length); + + for (unsigned int i = 0; i < type->length; i++) { + const char *name = ralloc_asprintf(mem_ctx, "%s_%d", + entry->var->name, i); + + entry->components[i] = + new(entry->mem_ctx) ir_variable(type->fields.array, + name, + ir_var_temporary); + entry->var->insert_before(entry->components[i]); + } + + entry->var->remove(); + } + + ir_array_splitting_visitor split(&refs.variable_list); + visit_list_elements(&split, instructions); + + if (debug) + _mesa_print_ir(instructions, NULL); + + ralloc_free(mem_ctx); + + return true; + +} -- 2.30.2