/**
* \file lower_if_to_cond_assign.cpp
*
- * This attempts to flatten if-statements to conditional assignments for
- * GPUs with limited or no flow control support.
+ * This flattens if-statements to conditional assignments if:
+ *
+ * - the GPU has limited or no flow control support
+ * (controlled by max_depth)
+ *
+ * - small conditional branches are more expensive than conditional assignments
+ * (controlled by min_branch_cost, that's the cost for a branch to be
+ * preserved)
*
* It can't handle other control flow being inside of its block, such
* as calls or loops. Hopefully loop unrolling and inlining will take
#include "ir.h"
#include "util/set.h"
#include "util/hash_table.h" /* Needed for the hashing functions */
+#include "main/macros.h" /* for MAX2 */
namespace {
class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
public:
ir_if_to_cond_assign_visitor(gl_shader_stage stage,
- unsigned max_depth)
+ unsigned max_depth,
+ unsigned min_branch_cost)
{
this->progress = false;
this->stage = stage;
this->max_depth = max_depth;
+ this->min_branch_cost = min_branch_cost;
this->depth = 0;
this->condition_variables =
ir_visitor_status visit_leave(ir_if *);
bool found_unsupported_op;
+ bool found_expensive_op;
+ bool is_then;
bool progress;
gl_shader_stage stage;
+ unsigned then_cost;
+ unsigned else_cost;
+ unsigned min_branch_cost;
unsigned max_depth;
unsigned depth;
bool
lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
- unsigned max_depth)
+ unsigned max_depth, unsigned min_branch_cost)
{
if (max_depth == UINT_MAX)
return false;
- ir_if_to_cond_assign_visitor v(stage, max_depth);
+ ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
visit_list_elements(&v, instructions);
break;
}
+ /* SSBO, images, atomic counters are handled by ir_type_call */
+ case ir_type_texture:
+ v->found_expensive_op = true;
+ break;
+
+ case ir_type_expression:
+ case ir_type_dereference_array:
+ case ir_type_dereference_record:
+ if (v->is_then)
+ v->then_cost++;
+ else
+ v->else_cost++;
+ break;
+
default:
break;
}
ir_visitor_status
ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
{
+ bool must_lower = this->depth-- > this->max_depth;
+
/* Only flatten when beyond the GPU's maximum supported nesting depth. */
- if (this->depth-- <= this->max_depth)
+ if (!must_lower && this->min_branch_cost == 0)
return visit_continue;
this->found_unsupported_op = false;
+ this->found_expensive_op = false;
+ this->then_cost = 0;
+ this->else_cost = 0;
ir_assignment *assign;
/* Check that both blocks don't contain anything we can't support. */
+ this->is_then = true;
foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
visit_tree(then_ir, check_ir_node, this);
}
+
+ this->is_then = false;
foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
visit_tree(else_ir, check_ir_node, this);
}
+
if (this->found_unsupported_op)
return visit_continue; /* can't handle inner unsupported opcodes */
+ /* Skip if the branch cost is high enough or if there's an expensive op. */
+ if (!must_lower &&
+ (this->found_expensive_op ||
+ MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
+ return visit_continue;
+
void *mem_ctx = ralloc_parent(ir);
/* Store the condition to a variable. Move all of the instructions from