2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_minmax.cpp
27 * Drop operands from an expression tree of only min/max operations if they
28 * can be proven to not contribute to the final result.
30 * The algorithm is similar to alpha-beta pruning on a minmax search.
34 #include "ir_visitor.h"
35 #include "ir_rvalue_visitor.h"
36 #include "ir_optimization.h"
37 #include "ir_builder.h"
38 #include "program/prog_instruction.h"
39 #include "glsl_types.h"
40 #include "main/macros.h"
42 using namespace ir_builder
;
46 enum compare_components_result
{
57 minmax_range(ir_constant
*low
= NULL
, ir_constant
*high
= NULL
)
63 /* low is the lower limit of the range, high is the higher limit. NULL on
64 * low means negative infinity (unlimited) and on high positive infinity
65 * (unlimited). Because of the two interpretations of the value NULL,
66 * arbitrary comparison between ir_constants is impossible.
72 class ir_minmax_visitor
: public ir_rvalue_enter_visitor
{
79 ir_rvalue
*prune_expression(ir_expression
*expr
, minmax_range baserange
);
81 void handle_rvalue(ir_rvalue
**rvalue
);
87 * Returns LESS if all vector components of `a' are strictly lower than of `b',
88 * GREATER if all vector components of `a' are strictly greater than of `b',
89 * MIXED if some vector components of `a' are strictly lower than of `b' while
90 * others are strictly greater, or EQUAL otherwise.
92 static enum compare_components_result
93 compare_components(ir_constant
*a
, ir_constant
*b
)
98 assert(a
->type
->base_type
== b
->type
->base_type
);
100 unsigned a_inc
= a
->type
->is_scalar() ? 0 : 1;
101 unsigned b_inc
= b
->type
->is_scalar() ? 0 : 1;
102 unsigned components
= MAX2(a
->type
->components(), b
->type
->components());
104 bool foundless
= false;
105 bool foundgreater
= false;
106 bool foundequal
= false;
108 for (unsigned i
= 0, c0
= 0, c1
= 0;
110 c0
+= a_inc
, c1
+= b_inc
, ++i
) {
111 switch (a
->type
->base_type
) {
113 if (a
->value
.u
[c0
] < b
->value
.u
[c1
])
115 else if (a
->value
.u
[c0
] > b
->value
.u
[c1
])
121 if (a
->value
.i
[c0
] < b
->value
.i
[c1
])
123 else if (a
->value
.i
[c0
] > b
->value
.i
[c1
])
128 case GLSL_TYPE_FLOAT
:
129 if (a
->value
.f
[c0
] < b
->value
.f
[c1
])
131 else if (a
->value
.f
[c0
] > b
->value
.f
[c1
])
137 unreachable("not reached");
141 if (foundless
&& foundgreater
) {
142 /* Some components are strictly lower, others are strictly greater */
147 /* It is not mixed, but it is not strictly lower or greater */
149 return LESS_OR_EQUAL
;
151 return GREATER_OR_EQUAL
;
155 /* All components are strictly lower or strictly greater */
156 return foundless
? LESS
: GREATER
;
160 combine_constant(bool ismin
, ir_constant
*a
, ir_constant
*b
)
162 void *mem_ctx
= ralloc_parent(a
);
163 ir_constant
*c
= a
->clone(mem_ctx
, NULL
);
164 for (unsigned i
= 0; i
< c
->type
->components(); i
++) {
165 switch (c
->type
->base_type
) {
167 if ((ismin
&& b
->value
.u
[i
] < c
->value
.u
[i
]) ||
168 (!ismin
&& b
->value
.u
[i
] > c
->value
.u
[i
]))
169 c
->value
.u
[i
] = b
->value
.u
[i
];
172 if ((ismin
&& b
->value
.i
[i
] < c
->value
.i
[i
]) ||
173 (!ismin
&& b
->value
.i
[i
] > c
->value
.i
[i
]))
174 c
->value
.i
[i
] = b
->value
.i
[i
];
176 case GLSL_TYPE_FLOAT
:
177 if ((ismin
&& b
->value
.f
[i
] < c
->value
.f
[i
]) ||
178 (!ismin
&& b
->value
.f
[i
] > c
->value
.f
[i
]))
179 c
->value
.f
[i
] = b
->value
.f
[i
];
182 assert(!"not reached");
189 smaller_constant(ir_constant
*a
, ir_constant
*b
)
194 enum compare_components_result ret
= compare_components(a
, b
);
196 return combine_constant(true, a
, b
);
197 else if (ret
< EQUAL
)
204 larger_constant(ir_constant
*a
, ir_constant
*b
)
209 enum compare_components_result ret
= compare_components(a
, b
);
211 return combine_constant(false, a
, b
);
212 else if (ret
< EQUAL
)
218 /* Combines two ranges by doing an element-wise min() / max() depending on the
222 combine_range(minmax_range r0
, minmax_range r1
, bool ismin
)
227 ret
.low
= ismin
? r0
.low
: r1
.low
;
228 } else if (!r1
.low
) {
229 ret
.low
= ismin
? r1
.low
: r0
.low
;
231 ret
.low
= ismin
? smaller_constant(r0
.low
, r1
.low
) :
232 larger_constant(r0
.low
, r1
.low
);
236 ret
.high
= ismin
? r1
.high
: r0
.high
;
237 } else if (!r1
.high
) {
238 ret
.high
= ismin
? r0
.high
: r1
.high
;
240 ret
.high
= ismin
? smaller_constant(r0
.high
, r1
.high
) :
241 larger_constant(r0
.high
, r1
.high
);
247 /* Returns a range so that lower limit is the larger of the two lower limits,
248 * and higher limit is the smaller of the two higher limits.
251 range_intersection(minmax_range r0
, minmax_range r1
)
260 ret
.low
= larger_constant(r0
.low
, r1
.low
);
267 ret
.high
= smaller_constant(r0
.high
, r1
.high
);
273 get_range(ir_rvalue
*rval
)
275 ir_expression
*expr
= rval
->as_expression();
276 if (expr
&& (expr
->operation
== ir_binop_min
||
277 expr
->operation
== ir_binop_max
)) {
278 minmax_range r0
= get_range(expr
->operands
[0]);
279 minmax_range r1
= get_range(expr
->operands
[1]);
280 return combine_range(r0
, r1
, expr
->operation
== ir_binop_min
);
283 ir_constant
*c
= rval
->as_constant();
285 return minmax_range(c
, c
);
288 return minmax_range();
292 * Prunes a min/max expression considering the base range of the parent
293 * min/max expression.
295 * @param baserange the range that the parents of this min/max expression
296 * in the min/max tree will clamp its value to.
299 ir_minmax_visitor::prune_expression(ir_expression
*expr
, minmax_range baserange
)
301 assert(expr
->operation
== ir_binop_min
||
302 expr
->operation
== ir_binop_max
);
304 bool ismin
= expr
->operation
== ir_binop_min
;
305 minmax_range limits
[2];
307 /* Recurse to get the ranges for each of the subtrees of this
308 * expression. We need to do this as a separate step because we need to
309 * know the ranges of each of the subtrees before we prune either one.
310 * Consider something like this:
318 * We would like to prune away the max on the bottom-right, but to do so
319 * we need to know the range of the expression on the left beforehand,
320 * and there's no guarantee that we will visit either subtree in a
323 for (unsigned i
= 0; i
< 2; ++i
)
324 limits
[i
] = get_range(expr
->operands
[i
]);
326 for (unsigned i
= 0; i
< 2; ++i
) {
327 bool is_redundant
= false;
329 enum compare_components_result cr
= LESS
;
331 /* If this operand will always be greater than the other one, it's
334 if (limits
[i
].low
&& limits
[1 - i
].high
) {
335 cr
= compare_components(limits
[i
].low
, limits
[1 - i
].high
);
336 if (cr
>= EQUAL
&& cr
!= MIXED
)
339 /* If this operand is always greater than baserange, then even if
340 * it's smaller than the other one it'll get clamped, so it's
343 if (!is_redundant
&& limits
[i
].low
&& baserange
.high
) {
344 cr
= compare_components(limits
[i
].low
, baserange
.high
);
345 if (cr
>= EQUAL
&& cr
!= MIXED
)
349 /* If this operand will always be lower than the other one, it's
352 if (limits
[i
].high
&& limits
[1 - i
].low
) {
353 cr
= compare_components(limits
[i
].high
, limits
[1 - i
].low
);
357 /* If this operand is always lower than baserange, then even if
358 * it's greater than the other one it'll get clamped, so it's
361 if (!is_redundant
&& limits
[i
].high
&& baserange
.low
) {
362 cr
= compare_components(limits
[i
].high
, baserange
.low
);
371 /* Recurse if necessary. */
372 ir_expression
*op_expr
= expr
->operands
[1 - i
]->as_expression();
373 if (op_expr
&& (op_expr
->operation
== ir_binop_min
||
374 op_expr
->operation
== ir_binop_max
)) {
375 return prune_expression(op_expr
, baserange
);
378 return expr
->operands
[1 - i
];
379 } else if (cr
== MIXED
) {
380 /* If we have mixed vector operands, we can try to resolve the minmax
381 * expression by doing a component-wise minmax:
390 ir_constant
*a
= expr
->operands
[0]->as_constant();
391 ir_constant
*b
= expr
->operands
[1]->as_constant();
393 return combine_constant(ismin
, a
, b
);
397 /* Now recurse to operands giving them the proper baserange. The baserange
398 * to pass is the intersection of our baserange and the other operand's
399 * limit with one of the ranges unlimited. If we can't compute a valid
400 * intersection, we use the current baserange.
402 for (unsigned i
= 0; i
< 2; ++i
) {
403 ir_expression
*op_expr
= expr
->operands
[i
]->as_expression();
404 if (op_expr
&& (op_expr
->operation
== ir_binop_min
||
405 op_expr
->operation
== ir_binop_max
)) {
406 /* We can only compute a new baserange for this operand if we managed
407 * to compute a valid range for the other operand.
410 limits
[1 - i
].low
= NULL
;
412 limits
[1 - i
].high
= NULL
;
413 minmax_range base
= range_intersection(limits
[1 - i
], baserange
);
414 expr
->operands
[i
] = prune_expression(op_expr
, base
);
418 /* If we got here we could not discard any of the operands of the minmax
419 * expression, but we can still try to resolve the expression if both
420 * operands are constant. We do this after the loop above, to make sure
421 * that if our operands are minmax expressions we have tried to prune them
422 * first (hopefully reducing them to constants).
424 ir_constant
*a
= expr
->operands
[0]->as_constant();
425 ir_constant
*b
= expr
->operands
[1]->as_constant();
427 return combine_constant(ismin
, a
, b
);
433 swizzle_if_required(ir_expression
*expr
, ir_rvalue
*rval
)
435 if (expr
->type
->is_vector() && rval
->type
->is_scalar()) {
436 return swizzle(rval
, SWIZZLE_XXXX
, expr
->type
->vector_elements
);
443 ir_minmax_visitor::handle_rvalue(ir_rvalue
**rvalue
)
448 ir_expression
*expr
= (*rvalue
)->as_expression();
449 if (!expr
|| (expr
->operation
!= ir_binop_min
&&
450 expr
->operation
!= ir_binop_max
))
453 ir_rvalue
*new_rvalue
= prune_expression(expr
, minmax_range());
454 if (new_rvalue
== *rvalue
)
457 /* If the expression type is a vector and the optimization leaves a scalar
458 * as the result, we need to turn it into a vector.
460 *rvalue
= swizzle_if_required(expr
, new_rvalue
);
468 do_minmax_prune(exec_list
*instructions
)
472 visit_list_elements(&v
, instructions
);