From e413d3f15ca72b82ca29e43f010597a83427d5e8 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 17 Apr 2013 17:30:25 -0700 Subject: [PATCH] glsl: Add a pass to flip matrix/vector multiplies to use dot products. This pass flips (matrix * vector) operations to (vector * matrixTranspose) for certain built-in matrices (currently gl_ModelViewProjectionMatrix and gl_TextureMatrix). This is equivalent, but results in dot products rather than multiplies and adds. On some hardware, this is more efficient. This pass is conditionalized on ctx->mvp_with_dp4, the flag drivers set to indicate they prefer dot products. Improves performance in Lightsmark by 1.01131% +/- 0.162069% (n = 10) on a Haswell GT2 system. Passes Piglit on Ivybridge. v2: Use struct gl_shader_compiler_options instead of plumbing through another boolean flag for this purpose. Signed-off-by: Kenneth Graunke Reviewed-by: Ian Romanick Reviewed-by: Eric Anholt --- src/glsl/Makefile.sources | 1 + src/glsl/glsl_parser_extras.cpp | 4 ++ src/glsl/ir_optimization.h | 1 + src/glsl/opt_flip_matrices.cpp | 122 ++++++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+) create mode 100644 src/glsl/opt_flip_matrices.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 674a05f5a32..df03cf806c4 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -82,6 +82,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/opt_dead_code_local.cpp \ $(GLSL_SRCDIR)/opt_dead_functions.cpp \ $(GLSL_SRCDIR)/opt_flatten_nested_if_blocks.cpp \ + $(GLSL_SRCDIR)/opt_flip_matrices.cpp \ $(GLSL_SRCDIR)/opt_function_inlining.cpp \ $(GLSL_SRCDIR)/opt_if_simplification.cpp \ $(GLSL_SRCDIR)/opt_noop_swizzle.cpp \ diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 4e496a1749d..a6e6dde8564 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -1224,6 +1224,10 @@ do_common_optimization(exec_list *ir, bool linked, progress = opt_flatten_nested_if_blocks(ir) || progress; progress = do_copy_propagation(ir) || progress; progress = do_copy_propagation_elements(ir) || progress; + + if (options->PreferDP4 && !linked) + progress = opt_flip_matrices(ir) || progress; + if (linked) progress = do_dead_code(ir, uniform_locations_assigned) || progress; else diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 9d28e9166e9..28093cfe1a1 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -80,6 +80,7 @@ bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned); bool do_dead_code_local(exec_list *instructions); bool do_dead_code_unlinked(exec_list *instructions); bool do_dead_functions(exec_list *instructions); +bool opt_flip_matrices(exec_list *instructions); bool do_function_inlining(exec_list *instructions); bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false); bool do_lower_texture_projection(exec_list *instructions); diff --git a/src/glsl/opt_flip_matrices.cpp b/src/glsl/opt_flip_matrices.cpp new file mode 100644 index 00000000000..497513fe82e --- /dev/null +++ b/src/glsl/opt_flip_matrices.cpp @@ -0,0 +1,122 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_flip_matrices.cpp + * + * Convert (matrix * vector) operations to (vector * matrixTranspose), + * which can be done using dot products rather than multiplies and adds. + * On some hardware, this is more efficient. + * + * This currently only does the conversion for built-in matrices which + * already have transposed equivalents. Namely, gl_ModelViewProjectionMatrix + * and gl_TextureMatrix. + */ +#include "ir.h" +#include "ir_optimization.h" +#include "main/macros.h" + +namespace { +class matrix_flipper : public ir_hierarchical_visitor { +public: + matrix_flipper(exec_list *instructions) + { + progress = false; + mvp_transpose = NULL; + texmat_transpose = NULL; + + foreach_list(n, instructions) { + ir_instruction *ir = (ir_instruction *) n; + ir_variable *var = ir->as_variable(); + if (!var) + continue; + if (strcmp(var->name, "gl_ModelViewProjectionMatrixTranspose") == 0) + mvp_transpose = var; + if (strcmp(var->name, "gl_TextureMatrixTranspose") == 0) + texmat_transpose = var; + } + } + + ir_visitor_status visit_enter(ir_expression *ir); + + bool progress; + +private: + ir_variable *mvp_transpose; + ir_variable *texmat_transpose; +}; +} + +ir_visitor_status +matrix_flipper::visit_enter(ir_expression *ir) +{ + if (ir->operation != ir_binop_mul || + !ir->operands[0]->type->is_matrix() || + !ir->operands[1]->type->is_vector()) + return visit_continue; + + ir_variable *mat_var = ir->operands[0]->variable_referenced(); + if (!mat_var) + return visit_continue; + + if (mvp_transpose && + strcmp(mat_var->name, "gl_ModelViewProjectionMatrix") == 0) { + ir_dereference_variable *deref = ir->operands[0]->as_dereference_variable(); + assert(deref && deref->var == mat_var); + + void *mem_ctx = ralloc_parent(ir); + + ir->operands[0] = ir->operands[1]; + ir->operands[1] = new(mem_ctx) ir_dereference_variable(mvp_transpose); + + progress = true; + } else if (texmat_transpose && + strcmp(mat_var->name, "gl_TextureMatrix") == 0) { + ir_dereference_array *array_ref = ir->operands[0]->as_dereference_array(); + assert(array_ref != NULL); + ir_dereference_variable *var_ref = array_ref->array->as_dereference_variable(); + assert(var_ref && var_ref->var == mat_var); + + ir->operands[0] = ir->operands[1]; + ir->operands[1] = array_ref; + + var_ref->var = texmat_transpose; + + texmat_transpose->max_array_access = + MAX2(texmat_transpose->max_array_access, mat_var->max_array_access); + + progress = true; + } + + return visit_continue; +} + +bool +opt_flip_matrices(struct exec_list *instructions) +{ + matrix_flipper v(instructions); + + visit_list_elements(&v, instructions); + + return v.progress; +} -- 2.30.2