glsl: Recognize mul(b2f(a), b2f(b)) as a logical AND.
[mesa.git] / src / glsl / opt_algebraic.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_algebraic.cpp
26 *
27 * Takes advantage of association, commutivity, and other algebraic
28 * properties to simplify expressions.
29 */
30
31 #include "ir.h"
32 #include "ir_visitor.h"
33 #include "ir_rvalue_visitor.h"
34 #include "ir_optimization.h"
35 #include "ir_builder.h"
36 #include "glsl_types.h"
37
38 using namespace ir_builder;
39
40 namespace {
41
42 /**
43 * Visitor class for replacing expressions with ir_constant values.
44 */
45
46 class ir_algebraic_visitor : public ir_rvalue_visitor {
47 public:
48 ir_algebraic_visitor(bool native_integers,
49 const struct gl_shader_compiler_options *options)
50 : options(options)
51 {
52 this->progress = false;
53 this->mem_ctx = NULL;
54 this->native_integers = native_integers;
55 }
56
57 virtual ~ir_algebraic_visitor()
58 {
59 }
60
61 ir_rvalue *handle_expression(ir_expression *ir);
62 void handle_rvalue(ir_rvalue **rvalue);
63 bool reassociate_constant(ir_expression *ir1,
64 int const_index,
65 ir_constant *constant,
66 ir_expression *ir2);
67 void reassociate_operands(ir_expression *ir1,
68 int op1,
69 ir_expression *ir2,
70 int op2);
71 ir_rvalue *swizzle_if_required(ir_expression *expr,
72 ir_rvalue *operand);
73
74 const struct gl_shader_compiler_options *options;
75 void *mem_ctx;
76
77 bool native_integers;
78 bool progress;
79 };
80
81 } /* unnamed namespace */
82
83 static inline bool
84 is_vec_zero(ir_constant *ir)
85 {
86 return (ir == NULL) ? false : ir->is_zero();
87 }
88
89 static inline bool
90 is_vec_one(ir_constant *ir)
91 {
92 return (ir == NULL) ? false : ir->is_one();
93 }
94
95 static inline bool
96 is_vec_two(ir_constant *ir)
97 {
98 return (ir == NULL) ? false : ir->is_value(2.0, 2);
99 }
100
101 static inline bool
102 is_vec_negative_one(ir_constant *ir)
103 {
104 return (ir == NULL) ? false : ir->is_negative_one();
105 }
106
107 static inline bool
108 is_valid_vec_const(ir_constant *ir)
109 {
110 if (ir == NULL)
111 return false;
112
113 if (!ir->type->is_scalar() && !ir->type->is_vector())
114 return false;
115
116 return true;
117 }
118
119 static inline bool
120 is_less_than_one(ir_constant *ir)
121 {
122 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
123
124 if (!is_valid_vec_const(ir))
125 return false;
126
127 unsigned component = 0;
128 for (int c = 0; c < ir->type->vector_elements; c++) {
129 if (ir->get_float_component(c) < 1.0f)
130 component++;
131 }
132
133 return (component == ir->type->vector_elements);
134 }
135
136 static inline bool
137 is_greater_than_zero(ir_constant *ir)
138 {
139 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
140
141 if (!is_valid_vec_const(ir))
142 return false;
143
144 unsigned component = 0;
145 for (int c = 0; c < ir->type->vector_elements; c++) {
146 if (ir->get_float_component(c) > 0.0f)
147 component++;
148 }
149
150 return (component == ir->type->vector_elements);
151 }
152
153 static void
154 update_type(ir_expression *ir)
155 {
156 if (ir->operands[0]->type->is_vector())
157 ir->type = ir->operands[0]->type;
158 else
159 ir->type = ir->operands[1]->type;
160 }
161
162 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
163 static ir_expression *
164 try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
165 {
166 if (expr0 && expr0->operation == ir_binop_add &&
167 expr0->type->is_float() &&
168 expr1 && expr1->operation == ir_binop_add &&
169 expr1->type->is_float()) {
170 ir_swizzle *x = expr0->operands[0]->as_swizzle();
171 ir_swizzle *y = expr0->operands[1]->as_swizzle();
172 ir_swizzle *z = expr1->operands[0]->as_swizzle();
173 ir_swizzle *w = expr1->operands[1]->as_swizzle();
174
175 if (!x || x->mask.num_components != 1 ||
176 !y || y->mask.num_components != 1 ||
177 !z || z->mask.num_components != 1 ||
178 !w || w->mask.num_components != 1) {
179 return NULL;
180 }
181
182 bool swiz_seen[4] = {false, false, false, false};
183 swiz_seen[x->mask.x] = true;
184 swiz_seen[y->mask.x] = true;
185 swiz_seen[z->mask.x] = true;
186 swiz_seen[w->mask.x] = true;
187
188 if (!swiz_seen[0] || !swiz_seen[1] ||
189 !swiz_seen[2] || !swiz_seen[3]) {
190 return NULL;
191 }
192
193 if (x->val->equals(y->val) &&
194 x->val->equals(z->val) &&
195 x->val->equals(w->val)) {
196 return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
197 }
198 }
199 return NULL;
200 }
201
202 void
203 ir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
204 int op1,
205 ir_expression *ir2,
206 int op2)
207 {
208 ir_rvalue *temp = ir2->operands[op2];
209 ir2->operands[op2] = ir1->operands[op1];
210 ir1->operands[op1] = temp;
211
212 /* Update the type of ir2. The type of ir1 won't have changed --
213 * base types matched, and at least one of the operands of the 2
214 * binops is still a vector if any of them were.
215 */
216 update_type(ir2);
217
218 this->progress = true;
219 }
220
221 /**
222 * Reassociates a constant down a tree of adds or multiplies.
223 *
224 * Consider (2 * (a * (b * 0.5))). We want to send up with a * b.
225 */
226 bool
227 ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
228 ir_constant *constant,
229 ir_expression *ir2)
230 {
231 if (!ir2 || ir1->operation != ir2->operation)
232 return false;
233
234 /* Don't want to even think about matrices. */
235 if (ir1->operands[0]->type->is_matrix() ||
236 ir1->operands[1]->type->is_matrix() ||
237 ir2->operands[0]->type->is_matrix() ||
238 ir2->operands[1]->type->is_matrix())
239 return false;
240
241 ir_constant *ir2_const[2];
242 ir2_const[0] = ir2->operands[0]->constant_expression_value();
243 ir2_const[1] = ir2->operands[1]->constant_expression_value();
244
245 if (ir2_const[0] && ir2_const[1])
246 return false;
247
248 if (ir2_const[0]) {
249 reassociate_operands(ir1, const_index, ir2, 1);
250 return true;
251 } else if (ir2_const[1]) {
252 reassociate_operands(ir1, const_index, ir2, 0);
253 return true;
254 }
255
256 if (reassociate_constant(ir1, const_index, constant,
257 ir2->operands[0]->as_expression())) {
258 update_type(ir2);
259 return true;
260 }
261
262 if (reassociate_constant(ir1, const_index, constant,
263 ir2->operands[1]->as_expression())) {
264 update_type(ir2);
265 return true;
266 }
267
268 return false;
269 }
270
271 /* When eliminating an expression and just returning one of its operands,
272 * we may need to swizzle that operand out to a vector if the expression was
273 * vector type.
274 */
275 ir_rvalue *
276 ir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
277 ir_rvalue *operand)
278 {
279 if (expr->type->is_vector() && operand->type->is_scalar()) {
280 return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0,
281 expr->type->vector_elements);
282 } else
283 return operand;
284 }
285
286 ir_rvalue *
287 ir_algebraic_visitor::handle_expression(ir_expression *ir)
288 {
289 ir_constant *op_const[4] = {NULL, NULL, NULL, NULL};
290 ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
291 unsigned int i;
292
293 assert(ir->get_num_operands() <= 4);
294 for (i = 0; i < ir->get_num_operands(); i++) {
295 if (ir->operands[i]->type->is_matrix())
296 return ir;
297
298 op_const[i] = ir->operands[i]->constant_expression_value();
299 op_expr[i] = ir->operands[i]->as_expression();
300 }
301
302 if (this->mem_ctx == NULL)
303 this->mem_ctx = ralloc_parent(ir);
304
305 switch (ir->operation) {
306 case ir_unop_bit_not:
307 if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not)
308 return op_expr[0]->operands[0];
309 break;
310
311 case ir_unop_abs:
312 if (op_expr[0] == NULL)
313 break;
314
315 switch (op_expr[0]->operation) {
316 case ir_unop_abs:
317 case ir_unop_neg:
318 return abs(op_expr[0]->operands[0]);
319 default:
320 break;
321 }
322 break;
323
324 case ir_unop_neg:
325 if (op_expr[0] == NULL)
326 break;
327
328 if (op_expr[0]->operation == ir_unop_neg) {
329 return op_expr[0]->operands[0];
330 }
331 break;
332
333 case ir_unop_exp:
334 if (op_expr[0] == NULL)
335 break;
336
337 if (op_expr[0]->operation == ir_unop_log) {
338 return op_expr[0]->operands[0];
339 }
340 break;
341
342 case ir_unop_log:
343 if (op_expr[0] == NULL)
344 break;
345
346 if (op_expr[0]->operation == ir_unop_exp) {
347 return op_expr[0]->operands[0];
348 }
349 break;
350
351 case ir_unop_exp2:
352 if (op_expr[0] == NULL)
353 break;
354
355 if (op_expr[0]->operation == ir_unop_log2) {
356 return op_expr[0]->operands[0];
357 }
358
359 if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) {
360 for (int log2_pos = 0; log2_pos < 2; log2_pos++) {
361 ir_expression *log2_expr =
362 op_expr[0]->operands[log2_pos]->as_expression();
363
364 if (log2_expr && log2_expr->operation == ir_unop_log2) {
365 return new(mem_ctx) ir_expression(ir_binop_pow,
366 ir->type,
367 log2_expr->operands[0],
368 op_expr[0]->operands[1 - log2_pos]);
369 }
370 }
371 }
372 break;
373
374 case ir_unop_log2:
375 if (op_expr[0] == NULL)
376 break;
377
378 if (op_expr[0]->operation == ir_unop_exp2) {
379 return op_expr[0]->operands[0];
380 }
381 break;
382
383 case ir_unop_f2i:
384 case ir_unop_f2u:
385 if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) {
386 return new(mem_ctx) ir_expression(ir->operation,
387 ir->type,
388 op_expr[0]->operands[0]);
389 }
390 break;
391
392 case ir_unop_logic_not: {
393 enum ir_expression_operation new_op = ir_unop_logic_not;
394
395 if (op_expr[0] == NULL)
396 break;
397
398 switch (op_expr[0]->operation) {
399 case ir_binop_less: new_op = ir_binop_gequal; break;
400 case ir_binop_greater: new_op = ir_binop_lequal; break;
401 case ir_binop_lequal: new_op = ir_binop_greater; break;
402 case ir_binop_gequal: new_op = ir_binop_less; break;
403 case ir_binop_equal: new_op = ir_binop_nequal; break;
404 case ir_binop_nequal: new_op = ir_binop_equal; break;
405 case ir_binop_all_equal: new_op = ir_binop_any_nequal; break;
406 case ir_binop_any_nequal: new_op = ir_binop_all_equal; break;
407
408 default:
409 /* The default case handler is here to silence a warning from GCC.
410 */
411 break;
412 }
413
414 if (new_op != ir_unop_logic_not) {
415 return new(mem_ctx) ir_expression(new_op,
416 ir->type,
417 op_expr[0]->operands[0],
418 op_expr[0]->operands[1]);
419 }
420
421 break;
422 }
423
424 case ir_binop_add:
425 if (is_vec_zero(op_const[0]))
426 return ir->operands[1];
427 if (is_vec_zero(op_const[1]))
428 return ir->operands[0];
429
430 /* Reassociate addition of constants so that we can do constant
431 * folding.
432 */
433 if (op_const[0] && !op_const[1])
434 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
435 if (op_const[1] && !op_const[0])
436 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
437
438 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
439 if (options->OptimizeForAOS) {
440 ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
441 mem_ctx);
442 if (expr)
443 return expr;
444 }
445
446 /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
447 *
448 * (-x + y) * a + x
449 * (x * -a) + (y * a) + x
450 * x + (x * -a) + (y * a)
451 * x * (1 - a) + y * a
452 * lrp(x, y, a)
453 */
454 for (int mul_pos = 0; mul_pos < 2; mul_pos++) {
455 ir_expression *mul = op_expr[mul_pos];
456
457 if (!mul || mul->operation != ir_binop_mul)
458 continue;
459
460 /* Multiply found on one of the operands. Now check for an
461 * inner addition operation.
462 */
463 for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) {
464 ir_expression *inner_add =
465 mul->operands[inner_add_pos]->as_expression();
466
467 if (!inner_add || inner_add->operation != ir_binop_add)
468 continue;
469
470 /* Inner addition found on one of the operands. Now check for
471 * one of the operands of the inner addition to be the negative
472 * of x_operand.
473 */
474 for (int neg_pos = 0; neg_pos < 2; neg_pos++) {
475 ir_expression *neg =
476 inner_add->operands[neg_pos]->as_expression();
477
478 if (!neg || neg->operation != ir_unop_neg)
479 continue;
480
481 ir_rvalue *x_operand = ir->operands[1 - mul_pos];
482
483 if (!neg->operands[0]->equals(x_operand))
484 continue;
485
486 ir_rvalue *y_operand = inner_add->operands[1 - neg_pos];
487 ir_rvalue *a_operand = mul->operands[1 - inner_add_pos];
488
489 if (x_operand->type != y_operand->type ||
490 x_operand->type != a_operand->type)
491 continue;
492
493 return lrp(x_operand, y_operand, a_operand);
494 }
495 }
496 }
497
498 break;
499
500 case ir_binop_sub:
501 if (is_vec_zero(op_const[0]))
502 return neg(ir->operands[1]);
503 if (is_vec_zero(op_const[1]))
504 return ir->operands[0];
505 break;
506
507 case ir_binop_mul:
508 if (is_vec_one(op_const[0]))
509 return ir->operands[1];
510 if (is_vec_one(op_const[1]))
511 return ir->operands[0];
512
513 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
514 return ir_constant::zero(ir, ir->type);
515
516 if (is_vec_negative_one(op_const[0]))
517 return neg(ir->operands[1]);
518 if (is_vec_negative_one(op_const[1]))
519 return neg(ir->operands[0]);
520
521 if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f &&
522 op_expr[1] && op_expr[1]->operation == ir_unop_b2f) {
523 return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0]));
524 }
525
526 /* Reassociate multiplication of constants so that we can do
527 * constant folding.
528 */
529 if (op_const[0] && !op_const[1])
530 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
531 if (op_const[1] && !op_const[0])
532 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
533
534 /* Optimizes
535 *
536 * (mul (floor (add (abs x) 0.5) (sign x)))
537 *
538 * into
539 *
540 * (trunc (add x (mul (sign x) 0.5)))
541 */
542 for (int i = 0; i < 2; i++) {
543 ir_expression *sign_expr = ir->operands[i]->as_expression();
544 ir_expression *floor_expr = ir->operands[1 - i]->as_expression();
545
546 if (!sign_expr || sign_expr->operation != ir_unop_sign ||
547 !floor_expr || floor_expr->operation != ir_unop_floor)
548 continue;
549
550 ir_expression *add_expr = floor_expr->operands[0]->as_expression();
551
552 for (int j = 0; j < 2; j++) {
553 ir_expression *abs_expr = add_expr->operands[j]->as_expression();
554 if (!abs_expr || abs_expr->operation != ir_unop_abs)
555 continue;
556
557 ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
558 if (!point_five->is_value(0.5, 0))
559 continue;
560
561 if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
562 return trunc(add(abs_expr->operands[0],
563 mul(sign_expr, point_five)));
564 }
565 }
566 }
567 break;
568
569 case ir_binop_div:
570 if (is_vec_one(op_const[0]) && (
571 ir->type->base_type == GLSL_TYPE_FLOAT ||
572 ir->type->base_type == GLSL_TYPE_DOUBLE)) {
573 return new(mem_ctx) ir_expression(ir_unop_rcp,
574 ir->operands[1]->type,
575 ir->operands[1],
576 NULL);
577 }
578 if (is_vec_one(op_const[1]))
579 return ir->operands[0];
580 break;
581
582 case ir_binop_dot:
583 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
584 return ir_constant::zero(mem_ctx, ir->type);
585
586 for (int i = 0; i < 2; i++) {
587 if (!op_const[i])
588 continue;
589
590 unsigned components[4] = { 0 }, count = 0;
591
592 for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) {
593 if (op_const[i]->is_zero())
594 continue;
595
596 components[count] = c;
597 count++;
598 }
599
600 /* No channels had zero values; bail. */
601 if (count >= op_const[i]->type->vector_elements)
602 break;
603
604 ir_expression_operation op = count == 1 ?
605 ir_binop_mul : ir_binop_dot;
606
607 /* Swizzle both operands to remove the channels that were zero. */
608 return new(mem_ctx)
609 ir_expression(op, ir->type,
610 new(mem_ctx) ir_swizzle(ir->operands[0],
611 components, count),
612 new(mem_ctx) ir_swizzle(ir->operands[1],
613 components, count));
614 }
615 break;
616
617 case ir_binop_less:
618 case ir_binop_lequal:
619 case ir_binop_greater:
620 case ir_binop_gequal:
621 case ir_binop_equal:
622 case ir_binop_nequal:
623 for (int add_pos = 0; add_pos < 2; add_pos++) {
624 ir_expression *add = op_expr[add_pos];
625
626 if (!add || add->operation != ir_binop_add)
627 continue;
628
629 ir_constant *zero = op_const[1 - add_pos];
630 if (!is_vec_zero(zero))
631 continue;
632
633 /* Depending of the zero position we want to optimize
634 * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y)
635 */
636 if (add_pos == 1) {
637 return new(mem_ctx) ir_expression(ir->operation,
638 neg(add->operands[0]),
639 add->operands[1]);
640 } else {
641 return new(mem_ctx) ir_expression(ir->operation,
642 add->operands[0],
643 neg(add->operands[1]));
644 }
645 }
646 break;
647
648 case ir_binop_all_equal:
649 case ir_binop_any_nequal:
650 if (ir->operands[0]->type->is_scalar() &&
651 ir->operands[1]->type->is_scalar())
652 return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal
653 ? ir_binop_equal : ir_binop_nequal,
654 ir->operands[0],
655 ir->operands[1]);
656 break;
657
658 case ir_binop_rshift:
659 case ir_binop_lshift:
660 /* 0 >> x == 0 */
661 if (is_vec_zero(op_const[0]))
662 return ir->operands[0];
663 /* x >> 0 == x */
664 if (is_vec_zero(op_const[1]))
665 return ir->operands[0];
666 break;
667
668 case ir_binop_logic_and:
669 if (is_vec_one(op_const[0])) {
670 return ir->operands[1];
671 } else if (is_vec_one(op_const[1])) {
672 return ir->operands[0];
673 } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
674 return ir_constant::zero(mem_ctx, ir->type);
675 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
676 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
677 /* De Morgan's Law:
678 * (not A) and (not B) === not (A or B)
679 */
680 return logic_not(logic_or(op_expr[0]->operands[0],
681 op_expr[1]->operands[0]));
682 } else if (ir->operands[0]->equals(ir->operands[1])) {
683 /* (a && a) == a */
684 return ir->operands[0];
685 }
686 break;
687
688 case ir_binop_logic_xor:
689 if (is_vec_zero(op_const[0])) {
690 return ir->operands[1];
691 } else if (is_vec_zero(op_const[1])) {
692 return ir->operands[0];
693 } else if (is_vec_one(op_const[0])) {
694 return logic_not(ir->operands[1]);
695 } else if (is_vec_one(op_const[1])) {
696 return logic_not(ir->operands[0]);
697 } else if (ir->operands[0]->equals(ir->operands[1])) {
698 /* (a ^^ a) == false */
699 return ir_constant::zero(mem_ctx, ir->type);
700 }
701 break;
702
703 case ir_binop_logic_or:
704 if (is_vec_zero(op_const[0])) {
705 return ir->operands[1];
706 } else if (is_vec_zero(op_const[1])) {
707 return ir->operands[0];
708 } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) {
709 ir_constant_data data;
710
711 for (unsigned i = 0; i < 16; i++)
712 data.b[i] = true;
713
714 return new(mem_ctx) ir_constant(ir->type, &data);
715 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
716 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
717 /* De Morgan's Law:
718 * (not A) or (not B) === not (A and B)
719 */
720 return logic_not(logic_and(op_expr[0]->operands[0],
721 op_expr[1]->operands[0]));
722 } else if (ir->operands[0]->equals(ir->operands[1])) {
723 /* (a || a) == a */
724 return ir->operands[0];
725 }
726 break;
727
728 case ir_binop_pow:
729 /* 1^x == 1 */
730 if (is_vec_one(op_const[0]))
731 return op_const[0];
732
733 /* x^1 == x */
734 if (is_vec_one(op_const[1]))
735 return ir->operands[0];
736
737 /* pow(2,x) == exp2(x) */
738 if (is_vec_two(op_const[0]))
739 return expr(ir_unop_exp2, ir->operands[1]);
740
741 if (is_vec_two(op_const[1])) {
742 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
743 ir_var_temporary);
744 base_ir->insert_before(x);
745 base_ir->insert_before(assign(x, ir->operands[0]));
746 return mul(x, x);
747 }
748
749 break;
750
751 case ir_binop_min:
752 case ir_binop_max:
753 if (ir->type->base_type != GLSL_TYPE_FLOAT || options->EmitNoSat)
754 break;
755
756 /* Replace min(max) operations and its commutative combinations with
757 * a saturate operation
758 */
759 for (int op = 0; op < 2; op++) {
760 ir_expression *inner_expr = op_expr[op];
761 ir_constant *outer_const = op_const[1 - op];
762 ir_expression_operation op_cond = (ir->operation == ir_binop_max) ?
763 ir_binop_min : ir_binop_max;
764
765 if (!inner_expr || !outer_const || (inner_expr->operation != op_cond))
766 continue;
767
768 /* One of these has to be a constant */
769 if (!inner_expr->operands[0]->as_constant() &&
770 !inner_expr->operands[1]->as_constant())
771 break;
772
773 /* Found a min(max) combination. Now try to see if its operands
774 * meet our conditions that we can do just a single saturate operation
775 */
776 for (int minmax_op = 0; minmax_op < 2; minmax_op++) {
777 ir_rvalue *x = inner_expr->operands[minmax_op];
778 ir_rvalue *y = inner_expr->operands[1 - minmax_op];
779
780 ir_constant *inner_const = y->as_constant();
781 if (!inner_const)
782 continue;
783
784 /* min(max(x, 0.0), 1.0) is sat(x) */
785 if (ir->operation == ir_binop_min &&
786 inner_const->is_zero() &&
787 outer_const->is_one())
788 return saturate(x);
789
790 /* max(min(x, 1.0), 0.0) is sat(x) */
791 if (ir->operation == ir_binop_max &&
792 inner_const->is_one() &&
793 outer_const->is_zero())
794 return saturate(x);
795
796 /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */
797 if (ir->operation == ir_binop_min &&
798 inner_const->is_zero() &&
799 is_less_than_one(outer_const))
800 return saturate(expr(ir_binop_min, x, outer_const));
801
802 /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */
803 if (ir->operation == ir_binop_max &&
804 is_less_than_one(inner_const) &&
805 outer_const->is_zero())
806 return saturate(expr(ir_binop_min, x, inner_const));
807
808 /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */
809 if (ir->operation == ir_binop_max &&
810 inner_const->is_one() &&
811 is_greater_than_zero(outer_const))
812 return saturate(expr(ir_binop_max, x, outer_const));
813
814 /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */
815 if (ir->operation == ir_binop_min &&
816 is_greater_than_zero(inner_const) &&
817 outer_const->is_one())
818 return saturate(expr(ir_binop_max, x, inner_const));
819 }
820 }
821
822 break;
823
824 case ir_unop_rcp:
825 if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
826 return op_expr[0]->operands[0];
827
828 if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 ||
829 op_expr[0]->operation == ir_unop_exp)) {
830 return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type,
831 neg(op_expr[0]->operands[0]));
832 }
833
834 /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at
835 * its IR level, so we can always apply this transformation.
836 */
837 if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq)
838 return sqrt(op_expr[0]->operands[0]);
839
840 /* As far as we know, all backends are OK with rsq. */
841 if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) {
842 return rsq(op_expr[0]->operands[0]);
843 }
844
845 break;
846
847 case ir_triop_fma:
848 /* Operands are op0 * op1 + op2. */
849 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
850 return ir->operands[2];
851 } else if (is_vec_zero(op_const[2])) {
852 return mul(ir->operands[0], ir->operands[1]);
853 } else if (is_vec_one(op_const[0])) {
854 return add(ir->operands[1], ir->operands[2]);
855 } else if (is_vec_one(op_const[1])) {
856 return add(ir->operands[0], ir->operands[2]);
857 }
858 break;
859
860 case ir_triop_lrp:
861 /* Operands are (x, y, a). */
862 if (is_vec_zero(op_const[2])) {
863 return ir->operands[0];
864 } else if (is_vec_one(op_const[2])) {
865 return ir->operands[1];
866 } else if (ir->operands[0]->equals(ir->operands[1])) {
867 return ir->operands[0];
868 } else if (is_vec_zero(op_const[0])) {
869 return mul(ir->operands[1], ir->operands[2]);
870 } else if (is_vec_zero(op_const[1])) {
871 unsigned op2_components = ir->operands[2]->type->vector_elements;
872 ir_constant *one;
873
874 switch (ir->type->base_type) {
875 case GLSL_TYPE_FLOAT:
876 one = new(mem_ctx) ir_constant(1.0f, op2_components);
877 break;
878 case GLSL_TYPE_DOUBLE:
879 one = new(mem_ctx) ir_constant(1.0, op2_components);
880 break;
881 default:
882 one = NULL;
883 unreachable("unexpected type");
884 }
885
886 return mul(ir->operands[0], add(one, neg(ir->operands[2])));
887 }
888 break;
889
890 case ir_triop_csel:
891 if (is_vec_one(op_const[0]))
892 return ir->operands[1];
893 if (is_vec_zero(op_const[0]))
894 return ir->operands[2];
895 break;
896
897 default:
898 break;
899 }
900
901 return ir;
902 }
903
904 void
905 ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
906 {
907 if (!*rvalue)
908 return;
909
910 ir_expression *expr = (*rvalue)->as_expression();
911 if (!expr || expr->operation == ir_quadop_vector)
912 return;
913
914 ir_rvalue *new_rvalue = handle_expression(expr);
915 if (new_rvalue == *rvalue)
916 return;
917
918 /* If the expr used to be some vec OP scalar returning a vector, and the
919 * optimization gave us back a scalar, we still need to turn it into a
920 * vector.
921 */
922 *rvalue = swizzle_if_required(expr, new_rvalue);
923
924 this->progress = true;
925 }
926
927 bool
928 do_algebraic(exec_list *instructions, bool native_integers,
929 const struct gl_shader_compiler_options *options)
930 {
931 ir_algebraic_visitor v(native_integers, options);
932
933 visit_list_elements(&v, instructions);
934
935 return v.progress;
936 }