From e33440070a54cd3e67953ee8410c0edb62643c47 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 3 Jul 2016 17:11:07 +0200 Subject: [PATCH] glsl/lower_if: conditionally lower if-branches based on their size MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/compiler/glsl/ir_optimization.h | 2 +- src/compiler/glsl/lower_if_to_cond_assign.cpp | 55 +++++++++++++++++-- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index e6e8318a3aa..0d6c4e6a66a 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -109,7 +109,7 @@ bool do_if_simplification(exec_list *instructions); bool opt_flatten_nested_if_blocks(exec_list *instructions); bool do_discard_simplification(exec_list *instructions); bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions, - unsigned max_depth = 0); + unsigned max_depth = 0, unsigned min_branch_cost = 0); bool do_mat_op_to_vec(exec_list *instructions); bool do_minmax_prune(exec_list *instructions); bool do_noop_swizzle(exec_list *instructions); diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp index e8db7aa9b7f..ae048be0d27 100644 --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp @@ -24,8 +24,14 @@ /** * \file lower_if_to_cond_assign.cpp * - * This attempts to flatten if-statements to conditional assignments for - * GPUs with limited or no flow control support. + * This flattens if-statements to conditional assignments if: + * + * - the GPU has limited or no flow control support + * (controlled by max_depth) + * + * - small conditional branches are more expensive than conditional assignments + * (controlled by min_branch_cost, that's the cost for a branch to be + * preserved) * * It can't handle other control flow being inside of its block, such * as calls or loops. Hopefully loop unrolling and inlining will take @@ -49,17 +55,20 @@ #include "ir.h" #include "util/set.h" #include "util/hash_table.h" /* Needed for the hashing functions */ +#include "main/macros.h" /* for MAX2 */ namespace { class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { public: ir_if_to_cond_assign_visitor(gl_shader_stage stage, - unsigned max_depth) + unsigned max_depth, + unsigned min_branch_cost) { this->progress = false; this->stage = stage; this->max_depth = max_depth; + this->min_branch_cost = min_branch_cost; this->depth = 0; this->condition_variables = @@ -76,8 +85,13 @@ public: ir_visitor_status visit_leave(ir_if *); bool found_unsupported_op; + bool found_expensive_op; + bool is_then; bool progress; gl_shader_stage stage; + unsigned then_cost; + unsigned else_cost; + unsigned min_branch_cost; unsigned max_depth; unsigned depth; @@ -88,12 +102,12 @@ public: bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions, - unsigned max_depth) + unsigned max_depth, unsigned min_branch_cost) { if (max_depth == UINT_MAX) return false; - ir_if_to_cond_assign_visitor v(stage, max_depth); + ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost); visit_list_elements(&v, instructions); @@ -129,6 +143,20 @@ check_ir_node(ir_instruction *ir, void *data) break; } + /* SSBO, images, atomic counters are handled by ir_type_call */ + case ir_type_texture: + v->found_expensive_op = true; + break; + + case ir_type_expression: + case ir_type_dereference_array: + case ir_type_dereference_record: + if (v->is_then) + v->then_cost++; + else + v->else_cost++; + break; + default: break; } @@ -193,24 +221,39 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir) ir_visitor_status ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) { + bool must_lower = this->depth-- > this->max_depth; + /* Only flatten when beyond the GPU's maximum supported nesting depth. */ - if (this->depth-- <= this->max_depth) + if (!must_lower && this->min_branch_cost == 0) return visit_continue; this->found_unsupported_op = false; + this->found_expensive_op = false; + this->then_cost = 0; + this->else_cost = 0; ir_assignment *assign; /* Check that both blocks don't contain anything we can't support. */ + this->is_then = true; foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) { visit_tree(then_ir, check_ir_node, this); } + + this->is_then = false; foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) { visit_tree(else_ir, check_ir_node, this); } + if (this->found_unsupported_op) return visit_continue; /* can't handle inner unsupported opcodes */ + /* Skip if the branch cost is high enough or if there's an expensive op. */ + if (!must_lower && + (this->found_expensive_op || + MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost)) + return visit_continue; + void *mem_ctx = ralloc_parent(ir); /* Store the condition to a variable. Move all of the instructions from -- 2.30.2