2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_minmax.cpp
27 * Drop operands from an expression tree of only min/max operations if they
28 * can be proven to not contribute to the final result.
30 * The algorithm is similar to alpha-beta pruning on a minmax search.
34 #include "ir_visitor.h"
35 #include "ir_rvalue_visitor.h"
36 #include "ir_optimization.h"
37 #include "ir_builder.h"
38 #include "program/prog_instruction.h"
39 #include "compiler/glsl_types.h"
40 #include "main/macros.h"
41 #include "util/half_float.h"
43 using namespace ir_builder
;
47 enum compare_components_result
{
58 minmax_range(ir_constant
*low
= NULL
, ir_constant
*high
= NULL
)
64 /* low is the lower limit of the range, high is the higher limit. NULL on
65 * low means negative infinity (unlimited) and on high positive infinity
66 * (unlimited). Because of the two interpretations of the value NULL,
67 * arbitrary comparison between ir_constants is impossible.
73 class ir_minmax_visitor
: public ir_rvalue_enter_visitor
{
80 ir_rvalue
*prune_expression(ir_expression
*expr
, minmax_range baserange
);
82 void handle_rvalue(ir_rvalue
**rvalue
);
88 * Returns LESS if all vector components of `a' are strictly lower than of `b',
89 * GREATER if all vector components of `a' are strictly greater than of `b',
90 * MIXED if some vector components of `a' are strictly lower than of `b' while
91 * others are strictly greater, or EQUAL otherwise.
93 static enum compare_components_result
94 compare_components(ir_constant
*a
, ir_constant
*b
)
99 assert(a
->type
->base_type
== b
->type
->base_type
);
101 unsigned a_inc
= a
->type
->is_scalar() ? 0 : 1;
102 unsigned b_inc
= b
->type
->is_scalar() ? 0 : 1;
103 unsigned components
= MAX2(a
->type
->components(), b
->type
->components());
105 bool foundless
= false;
106 bool foundgreater
= false;
107 bool foundequal
= false;
109 for (unsigned i
= 0, c0
= 0, c1
= 0;
111 c0
+= a_inc
, c1
+= b_inc
, ++i
) {
112 switch (a
->type
->base_type
) {
114 if (a
->value
.u
[c0
] < b
->value
.u
[c1
])
116 else if (a
->value
.u
[c0
] > b
->value
.u
[c1
])
122 if (a
->value
.i
[c0
] < b
->value
.i
[c1
])
124 else if (a
->value
.i
[c0
] > b
->value
.i
[c1
])
129 case GLSL_TYPE_FLOAT16
: {
130 float af
= _mesa_half_to_float(a
->value
.f16
[c0
]);
131 float bf
= _mesa_half_to_float(b
->value
.f16
[c1
]);
140 case GLSL_TYPE_FLOAT
:
141 if (a
->value
.f
[c0
] < b
->value
.f
[c1
])
143 else if (a
->value
.f
[c0
] > b
->value
.f
[c1
])
148 case GLSL_TYPE_DOUBLE
:
149 if (a
->value
.d
[c0
] < b
->value
.d
[c1
])
151 else if (a
->value
.d
[c0
] > b
->value
.d
[c1
])
157 unreachable("not reached");
161 if (foundless
&& foundgreater
) {
162 /* Some components are strictly lower, others are strictly greater */
167 /* It is not mixed, but it is not strictly lower or greater */
169 return LESS_OR_EQUAL
;
171 return GREATER_OR_EQUAL
;
175 /* All components are strictly lower or strictly greater */
176 return foundless
? LESS
: GREATER
;
180 combine_constant(bool ismin
, ir_constant
*a
, ir_constant
*b
)
182 void *mem_ctx
= ralloc_parent(a
);
183 ir_constant
*c
= a
->clone(mem_ctx
, NULL
);
184 for (unsigned i
= 0; i
< c
->type
->components(); i
++) {
185 switch (c
->type
->base_type
) {
187 if ((ismin
&& b
->value
.u
[i
] < c
->value
.u
[i
]) ||
188 (!ismin
&& b
->value
.u
[i
] > c
->value
.u
[i
]))
189 c
->value
.u
[i
] = b
->value
.u
[i
];
192 if ((ismin
&& b
->value
.i
[i
] < c
->value
.i
[i
]) ||
193 (!ismin
&& b
->value
.i
[i
] > c
->value
.i
[i
]))
194 c
->value
.i
[i
] = b
->value
.i
[i
];
196 case GLSL_TYPE_FLOAT16
: {
197 float bf
= _mesa_half_to_float(b
->value
.f16
[i
]);
198 float cf
= _mesa_half_to_float(c
->value
.f16
[i
]);
199 if ((ismin
&& bf
< cf
) || (!ismin
&& bf
> cf
))
200 c
->value
.f16
[i
] = b
->value
.f16
[i
];
203 case GLSL_TYPE_FLOAT
:
204 if ((ismin
&& b
->value
.f
[i
] < c
->value
.f
[i
]) ||
205 (!ismin
&& b
->value
.f
[i
] > c
->value
.f
[i
]))
206 c
->value
.f
[i
] = b
->value
.f
[i
];
208 case GLSL_TYPE_DOUBLE
:
209 if ((ismin
&& b
->value
.d
[i
] < c
->value
.d
[i
]) ||
210 (!ismin
&& b
->value
.d
[i
] > c
->value
.d
[i
]))
211 c
->value
.d
[i
] = b
->value
.d
[i
];
214 assert(!"not reached");
221 smaller_constant(ir_constant
*a
, ir_constant
*b
)
226 enum compare_components_result ret
= compare_components(a
, b
);
228 return combine_constant(true, a
, b
);
229 else if (ret
< EQUAL
)
236 larger_constant(ir_constant
*a
, ir_constant
*b
)
241 enum compare_components_result ret
= compare_components(a
, b
);
243 return combine_constant(false, a
, b
);
244 else if (ret
< EQUAL
)
250 /* Combines two ranges by doing an element-wise min() / max() depending on the
254 combine_range(minmax_range r0
, minmax_range r1
, bool ismin
)
259 ret
.low
= ismin
? r0
.low
: r1
.low
;
260 } else if (!r1
.low
) {
261 ret
.low
= ismin
? r1
.low
: r0
.low
;
263 ret
.low
= ismin
? smaller_constant(r0
.low
, r1
.low
) :
264 larger_constant(r0
.low
, r1
.low
);
268 ret
.high
= ismin
? r1
.high
: r0
.high
;
269 } else if (!r1
.high
) {
270 ret
.high
= ismin
? r0
.high
: r1
.high
;
272 ret
.high
= ismin
? smaller_constant(r0
.high
, r1
.high
) :
273 larger_constant(r0
.high
, r1
.high
);
279 /* Returns a range so that lower limit is the larger of the two lower limits,
280 * and higher limit is the smaller of the two higher limits.
283 range_intersection(minmax_range r0
, minmax_range r1
)
292 ret
.low
= larger_constant(r0
.low
, r1
.low
);
299 ret
.high
= smaller_constant(r0
.high
, r1
.high
);
305 get_range(ir_rvalue
*rval
)
307 ir_expression
*expr
= rval
->as_expression();
308 if (expr
&& (expr
->operation
== ir_binop_min
||
309 expr
->operation
== ir_binop_max
)) {
310 minmax_range r0
= get_range(expr
->operands
[0]);
311 minmax_range r1
= get_range(expr
->operands
[1]);
312 return combine_range(r0
, r1
, expr
->operation
== ir_binop_min
);
315 ir_constant
*c
= rval
->as_constant();
317 return minmax_range(c
, c
);
320 return minmax_range();
324 * Prunes a min/max expression considering the base range of the parent
325 * min/max expression.
327 * @param baserange the range that the parents of this min/max expression
328 * in the min/max tree will clamp its value to.
331 ir_minmax_visitor::prune_expression(ir_expression
*expr
, minmax_range baserange
)
333 assert(expr
->operation
== ir_binop_min
||
334 expr
->operation
== ir_binop_max
);
336 bool ismin
= expr
->operation
== ir_binop_min
;
337 minmax_range limits
[2];
339 /* Recurse to get the ranges for each of the subtrees of this
340 * expression. We need to do this as a separate step because we need to
341 * know the ranges of each of the subtrees before we prune either one.
342 * Consider something like this:
350 * We would like to prune away the max on the bottom-right, but to do so
351 * we need to know the range of the expression on the left beforehand,
352 * and there's no guarantee that we will visit either subtree in a
355 for (unsigned i
= 0; i
< 2; ++i
)
356 limits
[i
] = get_range(expr
->operands
[i
]);
358 for (unsigned i
= 0; i
< 2; ++i
) {
359 bool is_redundant
= false;
361 enum compare_components_result cr
= LESS
;
363 /* If this operand will always be greater than the other one, it's
366 if (limits
[i
].low
&& limits
[1 - i
].high
) {
367 cr
= compare_components(limits
[i
].low
, limits
[1 - i
].high
);
368 if (cr
>= EQUAL
&& cr
!= MIXED
)
371 /* If this operand is always greater than baserange, then even if
372 * it's smaller than the other one it'll get clamped, so it's
375 if (!is_redundant
&& limits
[i
].low
&& baserange
.high
) {
376 cr
= compare_components(limits
[i
].low
, baserange
.high
);
377 if (cr
> EQUAL
&& cr
!= MIXED
)
381 /* If this operand will always be lower than the other one, it's
384 if (limits
[i
].high
&& limits
[1 - i
].low
) {
385 cr
= compare_components(limits
[i
].high
, limits
[1 - i
].low
);
389 /* If this operand is always lower than baserange, then even if
390 * it's greater than the other one it'll get clamped, so it's
393 if (!is_redundant
&& limits
[i
].high
&& baserange
.low
) {
394 cr
= compare_components(limits
[i
].high
, baserange
.low
);
403 /* Recurse if necessary. */
404 ir_expression
*op_expr
= expr
->operands
[1 - i
]->as_expression();
405 if (op_expr
&& (op_expr
->operation
== ir_binop_min
||
406 op_expr
->operation
== ir_binop_max
)) {
407 return prune_expression(op_expr
, baserange
);
410 return expr
->operands
[1 - i
];
411 } else if (cr
== MIXED
) {
412 /* If we have mixed vector operands, we can try to resolve the minmax
413 * expression by doing a component-wise minmax:
422 ir_constant
*a
= expr
->operands
[0]->as_constant();
423 ir_constant
*b
= expr
->operands
[1]->as_constant();
425 return combine_constant(ismin
, a
, b
);
429 /* Now recurse to operands giving them the proper baserange. The baserange
430 * to pass is the intersection of our baserange and the other operand's
431 * limit with one of the ranges unlimited. If we can't compute a valid
432 * intersection, we use the current baserange.
434 for (unsigned i
= 0; i
< 2; ++i
) {
435 ir_expression
*op_expr
= expr
->operands
[i
]->as_expression();
436 if (op_expr
&& (op_expr
->operation
== ir_binop_min
||
437 op_expr
->operation
== ir_binop_max
)) {
438 /* We can only compute a new baserange for this operand if we managed
439 * to compute a valid range for the other operand.
442 limits
[1 - i
].low
= NULL
;
444 limits
[1 - i
].high
= NULL
;
445 minmax_range base
= range_intersection(limits
[1 - i
], baserange
);
446 expr
->operands
[i
] = prune_expression(op_expr
, base
);
450 /* If we got here we could not discard any of the operands of the minmax
451 * expression, but we can still try to resolve the expression if both
452 * operands are constant. We do this after the loop above, to make sure
453 * that if our operands are minmax expressions we have tried to prune them
454 * first (hopefully reducing them to constants).
456 ir_constant
*a
= expr
->operands
[0]->as_constant();
457 ir_constant
*b
= expr
->operands
[1]->as_constant();
459 return combine_constant(ismin
, a
, b
);
465 swizzle_if_required(ir_expression
*expr
, ir_rvalue
*rval
)
467 if (expr
->type
->is_vector() && rval
->type
->is_scalar()) {
468 return swizzle(rval
, SWIZZLE_XXXX
, expr
->type
->vector_elements
);
475 ir_minmax_visitor::handle_rvalue(ir_rvalue
**rvalue
)
480 ir_expression
*expr
= (*rvalue
)->as_expression();
481 if (!expr
|| (expr
->operation
!= ir_binop_min
&&
482 expr
->operation
!= ir_binop_max
))
485 ir_rvalue
*new_rvalue
= prune_expression(expr
, minmax_range());
486 if (new_rvalue
== *rvalue
)
489 /* If the expression type is a vector and the optimization leaves a scalar
490 * as the result, we need to turn it into a vector.
492 *rvalue
= swizzle_if_required(expr
, new_rvalue
);
500 do_minmax_prune(exec_list
*instructions
)
504 visit_list_elements(&v
, instructions
);