From e974781301601f411c376f9a7f5574adf9f0238a Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sun, 2 Mar 2014 08:59:50 -0800 Subject: [PATCH] glsl: Optimize (v.x + v.y) + (v.z + v.w) into dot(v, 1.0). Cuts five instructions out of SynMark's Gl32VSInstancing benchmark. --- src/glsl/opt_algebraic.cpp | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index d57c3e8bde4..448af6b505b 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -119,6 +119,44 @@ update_type(ir_expression *ir) ir->type = ir->operands[1]->type; } +/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ +static ir_expression * +try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx) +{ + if (expr0 && expr0->operation == ir_binop_add && + expr1 && expr1->operation == ir_binop_add) { + ir_swizzle *x = expr0->operands[0]->as_swizzle(); + ir_swizzle *y = expr0->operands[1]->as_swizzle(); + ir_swizzle *z = expr1->operands[0]->as_swizzle(); + ir_swizzle *w = expr1->operands[1]->as_swizzle(); + + if (!x || x->mask.num_components != 1 || + !y || y->mask.num_components != 1 || + !z || z->mask.num_components != 1 || + !w || w->mask.num_components != 1) { + return NULL; + } + + bool swiz_seen[4] = {false, false, false, false}; + swiz_seen[x->mask.x] = true; + swiz_seen[y->mask.x] = true; + swiz_seen[z->mask.x] = true; + swiz_seen[w->mask.x] = true; + + if (!swiz_seen[0] || !swiz_seen[1] || + !swiz_seen[2] || !swiz_seen[3]) { + return NULL; + } + + if (x->val->equals(y->val) && + x->val->equals(z->val) && + x->val->equals(w->val)) { + return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); + } + } + return NULL; +} + void ir_algebraic_visitor::reassociate_operands(ir_expression *ir1, int op1, @@ -332,6 +370,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if (op_const[1] && !op_const[0]) reassociate_constant(ir, 1, op_const[1], op_expr[0]); + /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ + if (options->OptimizeForAOS) { + ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], + mem_ctx); + if (expr) + return expr; + } + /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). * * (-x + y) * a + x -- 2.30.2