X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fglsl%2Flower_if_to_cond_assign.cpp;h=37f1ec8600c82e0794389b36a4ae0112ef67638f;hb=f9ab60202d48c72afa6a6f2a8c27db1e0777ed16;hp=01a73357d456103e3473958347420468b4198d6e;hpb=654e9466b5304b040374e72c9e9a524bd1268362;p=mesa.git diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp index 01a73357d45..37f1ec8600c 100644 --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp @@ -24,8 +24,14 @@ /** * \file lower_if_to_cond_assign.cpp * - * This attempts to flatten if-statements to conditional assignments for - * GPUs with limited or no flow control support. + * This flattens if-statements to conditional assignments if: + * + * - the GPU has limited or no flow control support + * (controlled by max_depth) + * + * - small conditional branches are more expensive than conditional assignments + * (controlled by min_branch_cost, that's the cost for a branch to be + * preserved) * * It can't handle other control flow being inside of its block, such * as calls or loops. Hopefully loop unrolling and inlining will take @@ -49,15 +55,20 @@ #include "ir.h" #include "util/set.h" #include "util/hash_table.h" /* Needed for the hashing functions */ +#include "main/macros.h" /* for MAX2 */ namespace { class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { public: - ir_if_to_cond_assign_visitor(unsigned max_depth) + ir_if_to_cond_assign_visitor(gl_shader_stage stage, + unsigned max_depth, + unsigned min_branch_cost) { this->progress = false; + this->stage = stage; this->max_depth = max_depth; + this->min_branch_cost = min_branch_cost; this->depth = 0; this->condition_variables = @@ -74,7 +85,14 @@ public: ir_visitor_status visit_leave(ir_if *); bool found_unsupported_op; + bool found_expensive_op; + bool found_dynamic_arrayref; + bool is_then; bool progress; + gl_shader_stage stage; + unsigned then_cost; + unsigned else_cost; + unsigned min_branch_cost; unsigned max_depth; unsigned depth; @@ -84,12 +102,13 @@ public: } /* anonymous namespace */ bool -lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth) +lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions, + unsigned max_depth, unsigned min_branch_cost) { if (max_depth == UINT_MAX) return false; - ir_if_to_cond_assign_visitor v(max_depth); + ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost); visit_list_elements(&v, instructions); @@ -112,6 +131,38 @@ check_ir_node(ir_instruction *ir, void *data) case ir_type_barrier: v->found_unsupported_op = true; break; + + case ir_type_dereference_variable: { + ir_variable *var = ir->as_dereference_variable()->variable_referenced(); + + /* Lowering branches with TCS output accesses breaks many piglit tests, + * so don't touch them for now. + */ + if (v->stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_out) + v->found_unsupported_op = true; + break; + } + + /* SSBO, images, atomic counters are handled by ir_type_call */ + case ir_type_texture: + v->found_expensive_op = true; + break; + + case ir_type_dereference_array: { + ir_dereference_array *deref = ir->as_dereference_array(); + + if (deref->array_index->ir_type != ir_type_constant) + v->found_dynamic_arrayref = true; + } /* fall-through */ + case ir_type_expression: + case ir_type_dereference_record: + if (v->is_then) + v->then_cost++; + else + v->else_cost++; + break; + default: break; } @@ -176,24 +227,48 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir) ir_visitor_status ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) { + bool must_lower = this->depth-- > this->max_depth; + /* Only flatten when beyond the GPU's maximum supported nesting depth. */ - if (this->depth-- <= this->max_depth) + if (!must_lower && this->min_branch_cost == 0) return visit_continue; this->found_unsupported_op = false; + this->found_expensive_op = false; + this->found_dynamic_arrayref = false; + this->then_cost = 0; + this->else_cost = 0; ir_assignment *assign; /* Check that both blocks don't contain anything we can't support. */ + this->is_then = true; foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) { visit_tree(then_ir, check_ir_node, this); } + + this->is_then = false; foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) { visit_tree(else_ir, check_ir_node, this); } + if (this->found_unsupported_op) return visit_continue; /* can't handle inner unsupported opcodes */ + /* Skip if the branch cost is high enough or if there's an expensive op. + * + * Also skip if non-constant array indices were encountered, since those + * can be out-of-bounds for a not-taken branch, and so generating an + * assignment would be incorrect. In the case of must_lower, it's up to the + * backend to deal with any potential fall-out (perhaps by translating the + * assignments to hardware-predicated moves). + */ + if (!must_lower && + (this->found_expensive_op || + this->found_dynamic_arrayref || + MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost)) + return visit_continue; + void *mem_ctx = ralloc_parent(ir); /* Store the condition to a variable. Move all of the instructions from