nir: move to compiler/
[mesa.git] / src / glsl / opt_algebraic.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_algebraic.cpp
26 *
27 * Takes advantage of association, commutivity, and other algebraic
28 * properties to simplify expressions.
29 */
30
31 #include "ir.h"
32 #include "ir_visitor.h"
33 #include "ir_rvalue_visitor.h"
34 #include "ir_optimization.h"
35 #include "ir_builder.h"
36 #include "compiler/glsl_types.h"
37
38 using namespace ir_builder;
39
40 namespace {
41
42 /**
43 * Visitor class for replacing expressions with ir_constant values.
44 */
45
46 class ir_algebraic_visitor : public ir_rvalue_visitor {
47 public:
48 ir_algebraic_visitor(bool native_integers,
49 const struct gl_shader_compiler_options *options)
50 : options(options)
51 {
52 this->progress = false;
53 this->mem_ctx = NULL;
54 this->native_integers = native_integers;
55 }
56
57 virtual ~ir_algebraic_visitor()
58 {
59 }
60
61 ir_rvalue *handle_expression(ir_expression *ir);
62 void handle_rvalue(ir_rvalue **rvalue);
63 bool reassociate_constant(ir_expression *ir1,
64 int const_index,
65 ir_constant *constant,
66 ir_expression *ir2);
67 void reassociate_operands(ir_expression *ir1,
68 int op1,
69 ir_expression *ir2,
70 int op2);
71 ir_rvalue *swizzle_if_required(ir_expression *expr,
72 ir_rvalue *operand);
73
74 const struct gl_shader_compiler_options *options;
75 void *mem_ctx;
76
77 bool native_integers;
78 bool progress;
79 };
80
81 } /* unnamed namespace */
82
83 static inline bool
84 is_vec_zero(ir_constant *ir)
85 {
86 return (ir == NULL) ? false : ir->is_zero();
87 }
88
89 static inline bool
90 is_vec_one(ir_constant *ir)
91 {
92 return (ir == NULL) ? false : ir->is_one();
93 }
94
95 static inline bool
96 is_vec_two(ir_constant *ir)
97 {
98 return (ir == NULL) ? false : ir->is_value(2.0, 2);
99 }
100
101 static inline bool
102 is_vec_four(ir_constant *ir)
103 {
104 return (ir == NULL) ? false : ir->is_value(4.0, 4);
105 }
106
107 static inline bool
108 is_vec_negative_one(ir_constant *ir)
109 {
110 return (ir == NULL) ? false : ir->is_negative_one();
111 }
112
113 static inline bool
114 is_valid_vec_const(ir_constant *ir)
115 {
116 if (ir == NULL)
117 return false;
118
119 if (!ir->type->is_scalar() && !ir->type->is_vector())
120 return false;
121
122 return true;
123 }
124
125 static inline bool
126 is_less_than_one(ir_constant *ir)
127 {
128 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
129
130 if (!is_valid_vec_const(ir))
131 return false;
132
133 unsigned component = 0;
134 for (int c = 0; c < ir->type->vector_elements; c++) {
135 if (ir->get_float_component(c) < 1.0f)
136 component++;
137 }
138
139 return (component == ir->type->vector_elements);
140 }
141
142 static inline bool
143 is_greater_than_zero(ir_constant *ir)
144 {
145 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
146
147 if (!is_valid_vec_const(ir))
148 return false;
149
150 unsigned component = 0;
151 for (int c = 0; c < ir->type->vector_elements; c++) {
152 if (ir->get_float_component(c) > 0.0f)
153 component++;
154 }
155
156 return (component == ir->type->vector_elements);
157 }
158
159 static void
160 update_type(ir_expression *ir)
161 {
162 if (ir->operands[0]->type->is_vector())
163 ir->type = ir->operands[0]->type;
164 else
165 ir->type = ir->operands[1]->type;
166 }
167
168 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
169 static ir_expression *
170 try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
171 {
172 if (expr0 && expr0->operation == ir_binop_add &&
173 expr0->type->is_float() &&
174 expr1 && expr1->operation == ir_binop_add &&
175 expr1->type->is_float()) {
176 ir_swizzle *x = expr0->operands[0]->as_swizzle();
177 ir_swizzle *y = expr0->operands[1]->as_swizzle();
178 ir_swizzle *z = expr1->operands[0]->as_swizzle();
179 ir_swizzle *w = expr1->operands[1]->as_swizzle();
180
181 if (!x || x->mask.num_components != 1 ||
182 !y || y->mask.num_components != 1 ||
183 !z || z->mask.num_components != 1 ||
184 !w || w->mask.num_components != 1) {
185 return NULL;
186 }
187
188 bool swiz_seen[4] = {false, false, false, false};
189 swiz_seen[x->mask.x] = true;
190 swiz_seen[y->mask.x] = true;
191 swiz_seen[z->mask.x] = true;
192 swiz_seen[w->mask.x] = true;
193
194 if (!swiz_seen[0] || !swiz_seen[1] ||
195 !swiz_seen[2] || !swiz_seen[3]) {
196 return NULL;
197 }
198
199 if (x->val->equals(y->val) &&
200 x->val->equals(z->val) &&
201 x->val->equals(w->val)) {
202 return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
203 }
204 }
205 return NULL;
206 }
207
208 void
209 ir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
210 int op1,
211 ir_expression *ir2,
212 int op2)
213 {
214 ir_rvalue *temp = ir2->operands[op2];
215 ir2->operands[op2] = ir1->operands[op1];
216 ir1->operands[op1] = temp;
217
218 /* Update the type of ir2. The type of ir1 won't have changed --
219 * base types matched, and at least one of the operands of the 2
220 * binops is still a vector if any of them were.
221 */
222 update_type(ir2);
223
224 this->progress = true;
225 }
226
227 /**
228 * Reassociates a constant down a tree of adds or multiplies.
229 *
230 * Consider (2 * (a * (b * 0.5))). We want to send up with a * b.
231 */
232 bool
233 ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
234 ir_constant *constant,
235 ir_expression *ir2)
236 {
237 if (!ir2 || ir1->operation != ir2->operation)
238 return false;
239
240 /* Don't want to even think about matrices. */
241 if (ir1->operands[0]->type->is_matrix() ||
242 ir1->operands[1]->type->is_matrix() ||
243 ir2->operands[0]->type->is_matrix() ||
244 ir2->operands[1]->type->is_matrix())
245 return false;
246
247 ir_constant *ir2_const[2];
248 ir2_const[0] = ir2->operands[0]->constant_expression_value();
249 ir2_const[1] = ir2->operands[1]->constant_expression_value();
250
251 if (ir2_const[0] && ir2_const[1])
252 return false;
253
254 if (ir2_const[0]) {
255 reassociate_operands(ir1, const_index, ir2, 1);
256 return true;
257 } else if (ir2_const[1]) {
258 reassociate_operands(ir1, const_index, ir2, 0);
259 return true;
260 }
261
262 if (reassociate_constant(ir1, const_index, constant,
263 ir2->operands[0]->as_expression())) {
264 update_type(ir2);
265 return true;
266 }
267
268 if (reassociate_constant(ir1, const_index, constant,
269 ir2->operands[1]->as_expression())) {
270 update_type(ir2);
271 return true;
272 }
273
274 return false;
275 }
276
277 /* When eliminating an expression and just returning one of its operands,
278 * we may need to swizzle that operand out to a vector if the expression was
279 * vector type.
280 */
281 ir_rvalue *
282 ir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
283 ir_rvalue *operand)
284 {
285 if (expr->type->is_vector() && operand->type->is_scalar()) {
286 return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0,
287 expr->type->vector_elements);
288 } else
289 return operand;
290 }
291
292 ir_rvalue *
293 ir_algebraic_visitor::handle_expression(ir_expression *ir)
294 {
295 ir_constant *op_const[4] = {NULL, NULL, NULL, NULL};
296 ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
297 unsigned int i;
298
299 if (ir->operation == ir_binop_mul &&
300 ir->operands[0]->type->is_matrix() &&
301 ir->operands[1]->type->is_vector()) {
302 ir_expression *matrix_mul = ir->operands[0]->as_expression();
303
304 if (matrix_mul && matrix_mul->operation == ir_binop_mul &&
305 matrix_mul->operands[0]->type->is_matrix() &&
306 matrix_mul->operands[1]->type->is_matrix()) {
307
308 return mul(matrix_mul->operands[0],
309 mul(matrix_mul->operands[1], ir->operands[1]));
310 }
311 }
312
313 assert(ir->get_num_operands() <= 4);
314 for (i = 0; i < ir->get_num_operands(); i++) {
315 if (ir->operands[i]->type->is_matrix())
316 return ir;
317
318 op_const[i] = ir->operands[i]->constant_expression_value();
319 op_expr[i] = ir->operands[i]->as_expression();
320 }
321
322 if (this->mem_ctx == NULL)
323 this->mem_ctx = ralloc_parent(ir);
324
325 switch (ir->operation) {
326 case ir_unop_bit_not:
327 if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not)
328 return op_expr[0]->operands[0];
329 break;
330
331 case ir_unop_abs:
332 if (op_expr[0] == NULL)
333 break;
334
335 switch (op_expr[0]->operation) {
336 case ir_unop_abs:
337 case ir_unop_neg:
338 return abs(op_expr[0]->operands[0]);
339 default:
340 break;
341 }
342 break;
343
344 case ir_unop_neg:
345 if (op_expr[0] == NULL)
346 break;
347
348 if (op_expr[0]->operation == ir_unop_neg) {
349 return op_expr[0]->operands[0];
350 }
351 break;
352
353 case ir_unop_exp:
354 if (op_expr[0] == NULL)
355 break;
356
357 if (op_expr[0]->operation == ir_unop_log) {
358 return op_expr[0]->operands[0];
359 }
360 break;
361
362 case ir_unop_log:
363 if (op_expr[0] == NULL)
364 break;
365
366 if (op_expr[0]->operation == ir_unop_exp) {
367 return op_expr[0]->operands[0];
368 }
369 break;
370
371 case ir_unop_exp2:
372 if (op_expr[0] == NULL)
373 break;
374
375 if (op_expr[0]->operation == ir_unop_log2) {
376 return op_expr[0]->operands[0];
377 }
378
379 if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) {
380 for (int log2_pos = 0; log2_pos < 2; log2_pos++) {
381 ir_expression *log2_expr =
382 op_expr[0]->operands[log2_pos]->as_expression();
383
384 if (log2_expr && log2_expr->operation == ir_unop_log2) {
385 return new(mem_ctx) ir_expression(ir_binop_pow,
386 ir->type,
387 log2_expr->operands[0],
388 op_expr[0]->operands[1 - log2_pos]);
389 }
390 }
391 }
392 break;
393
394 case ir_unop_log2:
395 if (op_expr[0] == NULL)
396 break;
397
398 if (op_expr[0]->operation == ir_unop_exp2) {
399 return op_expr[0]->operands[0];
400 }
401 break;
402
403 case ir_unop_f2i:
404 case ir_unop_f2u:
405 if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) {
406 return new(mem_ctx) ir_expression(ir->operation,
407 ir->type,
408 op_expr[0]->operands[0]);
409 }
410 break;
411
412 case ir_unop_logic_not: {
413 enum ir_expression_operation new_op = ir_unop_logic_not;
414
415 if (op_expr[0] == NULL)
416 break;
417
418 switch (op_expr[0]->operation) {
419 case ir_binop_less: new_op = ir_binop_gequal; break;
420 case ir_binop_greater: new_op = ir_binop_lequal; break;
421 case ir_binop_lequal: new_op = ir_binop_greater; break;
422 case ir_binop_gequal: new_op = ir_binop_less; break;
423 case ir_binop_equal: new_op = ir_binop_nequal; break;
424 case ir_binop_nequal: new_op = ir_binop_equal; break;
425 case ir_binop_all_equal: new_op = ir_binop_any_nequal; break;
426 case ir_binop_any_nequal: new_op = ir_binop_all_equal; break;
427
428 default:
429 /* The default case handler is here to silence a warning from GCC.
430 */
431 break;
432 }
433
434 if (new_op != ir_unop_logic_not) {
435 return new(mem_ctx) ir_expression(new_op,
436 ir->type,
437 op_expr[0]->operands[0],
438 op_expr[0]->operands[1]);
439 }
440
441 break;
442 }
443
444 case ir_unop_saturate:
445 if (op_expr[0] && op_expr[0]->operation == ir_binop_add) {
446 ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression();
447 ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression();
448
449 if (b2f_0 && b2f_0->operation == ir_unop_b2f &&
450 b2f_1 && b2f_1->operation == ir_unop_b2f) {
451 return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0]));
452 }
453 }
454 break;
455
456 case ir_binop_add:
457 if (is_vec_zero(op_const[0]))
458 return ir->operands[1];
459 if (is_vec_zero(op_const[1]))
460 return ir->operands[0];
461
462 /* Reassociate addition of constants so that we can do constant
463 * folding.
464 */
465 if (op_const[0] && !op_const[1])
466 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
467 if (op_const[1] && !op_const[0])
468 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
469
470 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
471 if (options->OptimizeForAOS) {
472 ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
473 mem_ctx);
474 if (expr)
475 return expr;
476 }
477
478 /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
479 *
480 * (-x + y) * a + x
481 * (x * -a) + (y * a) + x
482 * x + (x * -a) + (y * a)
483 * x * (1 - a) + y * a
484 * lrp(x, y, a)
485 */
486 for (int mul_pos = 0; mul_pos < 2; mul_pos++) {
487 ir_expression *mul = op_expr[mul_pos];
488
489 if (!mul || mul->operation != ir_binop_mul)
490 continue;
491
492 /* Multiply found on one of the operands. Now check for an
493 * inner addition operation.
494 */
495 for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) {
496 ir_expression *inner_add =
497 mul->operands[inner_add_pos]->as_expression();
498
499 if (!inner_add || inner_add->operation != ir_binop_add)
500 continue;
501
502 /* Inner addition found on one of the operands. Now check for
503 * one of the operands of the inner addition to be the negative
504 * of x_operand.
505 */
506 for (int neg_pos = 0; neg_pos < 2; neg_pos++) {
507 ir_expression *neg =
508 inner_add->operands[neg_pos]->as_expression();
509
510 if (!neg || neg->operation != ir_unop_neg)
511 continue;
512
513 ir_rvalue *x_operand = ir->operands[1 - mul_pos];
514
515 if (!neg->operands[0]->equals(x_operand))
516 continue;
517
518 ir_rvalue *y_operand = inner_add->operands[1 - neg_pos];
519 ir_rvalue *a_operand = mul->operands[1 - inner_add_pos];
520
521 if (x_operand->type != y_operand->type ||
522 x_operand->type != a_operand->type)
523 continue;
524
525 return lrp(x_operand, y_operand, a_operand);
526 }
527 }
528 }
529
530 break;
531
532 case ir_binop_sub:
533 if (is_vec_zero(op_const[0]))
534 return neg(ir->operands[1]);
535 if (is_vec_zero(op_const[1]))
536 return ir->operands[0];
537 break;
538
539 case ir_binop_mul:
540 if (is_vec_one(op_const[0]))
541 return ir->operands[1];
542 if (is_vec_one(op_const[1]))
543 return ir->operands[0];
544
545 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
546 return ir_constant::zero(ir, ir->type);
547
548 if (is_vec_negative_one(op_const[0]))
549 return neg(ir->operands[1]);
550 if (is_vec_negative_one(op_const[1]))
551 return neg(ir->operands[0]);
552
553 if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f &&
554 op_expr[1] && op_expr[1]->operation == ir_unop_b2f) {
555 return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0]));
556 }
557
558 /* Reassociate multiplication of constants so that we can do
559 * constant folding.
560 */
561 if (op_const[0] && !op_const[1])
562 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
563 if (op_const[1] && !op_const[0])
564 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
565
566 /* Optimizes
567 *
568 * (mul (floor (add (abs x) 0.5) (sign x)))
569 *
570 * into
571 *
572 * (trunc (add x (mul (sign x) 0.5)))
573 */
574 for (int i = 0; i < 2; i++) {
575 ir_expression *sign_expr = ir->operands[i]->as_expression();
576 ir_expression *floor_expr = ir->operands[1 - i]->as_expression();
577
578 if (!sign_expr || sign_expr->operation != ir_unop_sign ||
579 !floor_expr || floor_expr->operation != ir_unop_floor)
580 continue;
581
582 ir_expression *add_expr = floor_expr->operands[0]->as_expression();
583 if (!add_expr || add_expr->operation != ir_binop_add)
584 continue;
585
586 for (int j = 0; j < 2; j++) {
587 ir_expression *abs_expr = add_expr->operands[j]->as_expression();
588 if (!abs_expr || abs_expr->operation != ir_unop_abs)
589 continue;
590
591 ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
592 if (!point_five || !point_five->is_value(0.5, 0))
593 continue;
594
595 if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
596 return trunc(add(abs_expr->operands[0],
597 mul(sign_expr, point_five)));
598 }
599 }
600 }
601 break;
602
603 case ir_binop_div:
604 if (is_vec_one(op_const[0]) && (
605 ir->type->base_type == GLSL_TYPE_FLOAT ||
606 ir->type->base_type == GLSL_TYPE_DOUBLE)) {
607 return new(mem_ctx) ir_expression(ir_unop_rcp,
608 ir->operands[1]->type,
609 ir->operands[1],
610 NULL);
611 }
612 if (is_vec_one(op_const[1]))
613 return ir->operands[0];
614 break;
615
616 case ir_binop_dot:
617 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
618 return ir_constant::zero(mem_ctx, ir->type);
619
620 for (int i = 0; i < 2; i++) {
621 if (!op_const[i])
622 continue;
623
624 unsigned components[4] = { 0 }, count = 0;
625
626 for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) {
627 if (op_const[i]->is_zero())
628 continue;
629
630 components[count] = c;
631 count++;
632 }
633
634 /* No channels had zero values; bail. */
635 if (count >= op_const[i]->type->vector_elements)
636 break;
637
638 ir_expression_operation op = count == 1 ?
639 ir_binop_mul : ir_binop_dot;
640
641 /* Swizzle both operands to remove the channels that were zero. */
642 return new(mem_ctx)
643 ir_expression(op, ir->type,
644 new(mem_ctx) ir_swizzle(ir->operands[0],
645 components, count),
646 new(mem_ctx) ir_swizzle(ir->operands[1],
647 components, count));
648 }
649 break;
650
651 case ir_binop_less:
652 case ir_binop_lequal:
653 case ir_binop_greater:
654 case ir_binop_gequal:
655 case ir_binop_equal:
656 case ir_binop_nequal:
657 for (int add_pos = 0; add_pos < 2; add_pos++) {
658 ir_expression *add = op_expr[add_pos];
659
660 if (!add || add->operation != ir_binop_add)
661 continue;
662
663 ir_constant *zero = op_const[1 - add_pos];
664 if (!is_vec_zero(zero))
665 continue;
666
667 /* Depending of the zero position we want to optimize
668 * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y)
669 */
670 if (add_pos == 1) {
671 return new(mem_ctx) ir_expression(ir->operation,
672 neg(add->operands[0]),
673 add->operands[1]);
674 } else {
675 return new(mem_ctx) ir_expression(ir->operation,
676 add->operands[0],
677 neg(add->operands[1]));
678 }
679 }
680 break;
681
682 case ir_binop_all_equal:
683 case ir_binop_any_nequal:
684 if (ir->operands[0]->type->is_scalar() &&
685 ir->operands[1]->type->is_scalar())
686 return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal
687 ? ir_binop_equal : ir_binop_nequal,
688 ir->operands[0],
689 ir->operands[1]);
690 break;
691
692 case ir_binop_rshift:
693 case ir_binop_lshift:
694 /* 0 >> x == 0 */
695 if (is_vec_zero(op_const[0]))
696 return ir->operands[0];
697 /* x >> 0 == x */
698 if (is_vec_zero(op_const[1]))
699 return ir->operands[0];
700 break;
701
702 case ir_binop_logic_and:
703 if (is_vec_one(op_const[0])) {
704 return ir->operands[1];
705 } else if (is_vec_one(op_const[1])) {
706 return ir->operands[0];
707 } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
708 return ir_constant::zero(mem_ctx, ir->type);
709 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
710 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
711 /* De Morgan's Law:
712 * (not A) and (not B) === not (A or B)
713 */
714 return logic_not(logic_or(op_expr[0]->operands[0],
715 op_expr[1]->operands[0]));
716 } else if (ir->operands[0]->equals(ir->operands[1])) {
717 /* (a && a) == a */
718 return ir->operands[0];
719 }
720 break;
721
722 case ir_binop_logic_xor:
723 if (is_vec_zero(op_const[0])) {
724 return ir->operands[1];
725 } else if (is_vec_zero(op_const[1])) {
726 return ir->operands[0];
727 } else if (is_vec_one(op_const[0])) {
728 return logic_not(ir->operands[1]);
729 } else if (is_vec_one(op_const[1])) {
730 return logic_not(ir->operands[0]);
731 } else if (ir->operands[0]->equals(ir->operands[1])) {
732 /* (a ^^ a) == false */
733 return ir_constant::zero(mem_ctx, ir->type);
734 }
735 break;
736
737 case ir_binop_logic_or:
738 if (is_vec_zero(op_const[0])) {
739 return ir->operands[1];
740 } else if (is_vec_zero(op_const[1])) {
741 return ir->operands[0];
742 } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) {
743 ir_constant_data data;
744
745 for (unsigned i = 0; i < 16; i++)
746 data.b[i] = true;
747
748 return new(mem_ctx) ir_constant(ir->type, &data);
749 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
750 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
751 /* De Morgan's Law:
752 * (not A) or (not B) === not (A and B)
753 */
754 return logic_not(logic_and(op_expr[0]->operands[0],
755 op_expr[1]->operands[0]));
756 } else if (ir->operands[0]->equals(ir->operands[1])) {
757 /* (a || a) == a */
758 return ir->operands[0];
759 }
760 break;
761
762 case ir_binop_pow:
763 /* 1^x == 1 */
764 if (is_vec_one(op_const[0]))
765 return op_const[0];
766
767 /* x^1 == x */
768 if (is_vec_one(op_const[1]))
769 return ir->operands[0];
770
771 /* pow(2,x) == exp2(x) */
772 if (is_vec_two(op_const[0]))
773 return expr(ir_unop_exp2, ir->operands[1]);
774
775 if (is_vec_two(op_const[1])) {
776 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
777 ir_var_temporary);
778 base_ir->insert_before(x);
779 base_ir->insert_before(assign(x, ir->operands[0]));
780 return mul(x, x);
781 }
782
783 if (is_vec_four(op_const[1])) {
784 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
785 ir_var_temporary);
786 base_ir->insert_before(x);
787 base_ir->insert_before(assign(x, ir->operands[0]));
788
789 ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type,
790 "squared",
791 ir_var_temporary);
792 base_ir->insert_before(squared);
793 base_ir->insert_before(assign(squared, mul(x, x)));
794 return mul(squared, squared);
795 }
796
797 break;
798
799 case ir_binop_min:
800 case ir_binop_max:
801 if (ir->type->base_type != GLSL_TYPE_FLOAT || options->EmitNoSat)
802 break;
803
804 /* Replace min(max) operations and its commutative combinations with
805 * a saturate operation
806 */
807 for (int op = 0; op < 2; op++) {
808 ir_expression *inner_expr = op_expr[op];
809 ir_constant *outer_const = op_const[1 - op];
810 ir_expression_operation op_cond = (ir->operation == ir_binop_max) ?
811 ir_binop_min : ir_binop_max;
812
813 if (!inner_expr || !outer_const || (inner_expr->operation != op_cond))
814 continue;
815
816 /* One of these has to be a constant */
817 if (!inner_expr->operands[0]->as_constant() &&
818 !inner_expr->operands[1]->as_constant())
819 break;
820
821 /* Found a min(max) combination. Now try to see if its operands
822 * meet our conditions that we can do just a single saturate operation
823 */
824 for (int minmax_op = 0; minmax_op < 2; minmax_op++) {
825 ir_rvalue *x = inner_expr->operands[minmax_op];
826 ir_rvalue *y = inner_expr->operands[1 - minmax_op];
827
828 ir_constant *inner_const = y->as_constant();
829 if (!inner_const)
830 continue;
831
832 /* min(max(x, 0.0), 1.0) is sat(x) */
833 if (ir->operation == ir_binop_min &&
834 inner_const->is_zero() &&
835 outer_const->is_one())
836 return saturate(x);
837
838 /* max(min(x, 1.0), 0.0) is sat(x) */
839 if (ir->operation == ir_binop_max &&
840 inner_const->is_one() &&
841 outer_const->is_zero())
842 return saturate(x);
843
844 /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */
845 if (ir->operation == ir_binop_min &&
846 inner_const->is_zero() &&
847 is_less_than_one(outer_const))
848 return saturate(expr(ir_binop_min, x, outer_const));
849
850 /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */
851 if (ir->operation == ir_binop_max &&
852 is_less_than_one(inner_const) &&
853 outer_const->is_zero())
854 return saturate(expr(ir_binop_min, x, inner_const));
855
856 /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */
857 if (ir->operation == ir_binop_max &&
858 inner_const->is_one() &&
859 is_greater_than_zero(outer_const))
860 return saturate(expr(ir_binop_max, x, outer_const));
861
862 /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */
863 if (ir->operation == ir_binop_min &&
864 is_greater_than_zero(inner_const) &&
865 outer_const->is_one())
866 return saturate(expr(ir_binop_max, x, inner_const));
867 }
868 }
869
870 break;
871
872 case ir_unop_rcp:
873 if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
874 return op_expr[0]->operands[0];
875
876 if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 ||
877 op_expr[0]->operation == ir_unop_exp)) {
878 return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type,
879 neg(op_expr[0]->operands[0]));
880 }
881
882 /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at
883 * its IR level, so we can always apply this transformation.
884 */
885 if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq)
886 return sqrt(op_expr[0]->operands[0]);
887
888 /* As far as we know, all backends are OK with rsq. */
889 if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) {
890 return rsq(op_expr[0]->operands[0]);
891 }
892
893 break;
894
895 case ir_triop_fma:
896 /* Operands are op0 * op1 + op2. */
897 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
898 return ir->operands[2];
899 } else if (is_vec_zero(op_const[2])) {
900 return mul(ir->operands[0], ir->operands[1]);
901 } else if (is_vec_one(op_const[0])) {
902 return add(ir->operands[1], ir->operands[2]);
903 } else if (is_vec_one(op_const[1])) {
904 return add(ir->operands[0], ir->operands[2]);
905 }
906 break;
907
908 case ir_triop_lrp:
909 /* Operands are (x, y, a). */
910 if (is_vec_zero(op_const[2])) {
911 return ir->operands[0];
912 } else if (is_vec_one(op_const[2])) {
913 return ir->operands[1];
914 } else if (ir->operands[0]->equals(ir->operands[1])) {
915 return ir->operands[0];
916 } else if (is_vec_zero(op_const[0])) {
917 return mul(ir->operands[1], ir->operands[2]);
918 } else if (is_vec_zero(op_const[1])) {
919 unsigned op2_components = ir->operands[2]->type->vector_elements;
920 ir_constant *one;
921
922 switch (ir->type->base_type) {
923 case GLSL_TYPE_FLOAT:
924 one = new(mem_ctx) ir_constant(1.0f, op2_components);
925 break;
926 case GLSL_TYPE_DOUBLE:
927 one = new(mem_ctx) ir_constant(1.0, op2_components);
928 break;
929 default:
930 one = NULL;
931 unreachable("unexpected type");
932 }
933
934 return mul(ir->operands[0], add(one, neg(ir->operands[2])));
935 }
936 break;
937
938 case ir_triop_csel:
939 if (is_vec_one(op_const[0]))
940 return ir->operands[1];
941 if (is_vec_zero(op_const[0]))
942 return ir->operands[2];
943 break;
944
945 default:
946 break;
947 }
948
949 return ir;
950 }
951
952 void
953 ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
954 {
955 if (!*rvalue)
956 return;
957
958 ir_expression *expr = (*rvalue)->as_expression();
959 if (!expr || expr->operation == ir_quadop_vector)
960 return;
961
962 ir_rvalue *new_rvalue = handle_expression(expr);
963 if (new_rvalue == *rvalue)
964 return;
965
966 /* If the expr used to be some vec OP scalar returning a vector, and the
967 * optimization gave us back a scalar, we still need to turn it into a
968 * vector.
969 */
970 *rvalue = swizzle_if_required(expr, new_rvalue);
971
972 this->progress = true;
973 }
974
975 bool
976 do_algebraic(exec_list *instructions, bool native_integers,
977 const struct gl_shader_compiler_options *options)
978 {
979 ir_algebraic_visitor v(native_integers, options);
980
981 visit_list_elements(&v, instructions);
982
983 return v.progress;
984 }