2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_minmax.cpp
27 * Drop operands from an expression tree of only min/max operations if they
28 * can be proven to not contribute to the final result.
30 * The algorithm is similar to alpha-beta pruning on a minmax search.
34 #include "ir_visitor.h"
35 #include "ir_rvalue_visitor.h"
36 #include "ir_optimization.h"
37 #include "ir_builder.h"
38 #include "program/prog_instruction.h"
39 #include "glsl_types.h"
40 #include "main/macros.h"
42 using namespace ir_builder
;
46 enum compare_components_result
{
57 minmax_range(ir_constant
*low
= NULL
, ir_constant
*high
= NULL
)
63 /* low is the lower limit of the range, high is the higher limit. NULL on
64 * low means negative infinity (unlimited) and on high positive infinity
65 * (unlimited). Because of the two interpretations of the value NULL,
66 * arbitrary comparison between ir_constants is impossible.
72 class ir_minmax_visitor
: public ir_rvalue_enter_visitor
{
79 ir_rvalue
*prune_expression(ir_expression
*expr
, minmax_range baserange
);
81 void handle_rvalue(ir_rvalue
**rvalue
);
87 * Returns LESS if all vector components of `a' are strictly lower than of `b',
88 * GREATER if all vector components of `a' are strictly greater than of `b',
89 * MIXED if some vector components of `a' are strictly lower than of `b' while
90 * others are strictly greater, or EQUAL otherwise.
92 static enum compare_components_result
93 compare_components(ir_constant
*a
, ir_constant
*b
)
98 assert(a
->type
->base_type
== b
->type
->base_type
);
100 unsigned a_inc
= a
->type
->is_scalar() ? 0 : 1;
101 unsigned b_inc
= b
->type
->is_scalar() ? 0 : 1;
102 unsigned components
= MAX2(a
->type
->components(), b
->type
->components());
104 bool foundless
= false;
105 bool foundgreater
= false;
106 bool foundequal
= false;
108 for (unsigned i
= 0, c0
= 0, c1
= 0;
110 c0
+= a_inc
, c1
+= b_inc
, ++i
) {
111 switch (a
->type
->base_type
) {
113 if (a
->value
.u
[c0
] < b
->value
.u
[c1
])
115 else if (a
->value
.u
[c0
] > b
->value
.u
[c1
])
121 if (a
->value
.i
[c0
] < b
->value
.i
[c1
])
123 else if (a
->value
.i
[c0
] > b
->value
.i
[c1
])
128 case GLSL_TYPE_FLOAT
:
129 if (a
->value
.f
[c0
] < b
->value
.f
[c1
])
131 else if (a
->value
.f
[c0
] > b
->value
.f
[c1
])
136 case GLSL_TYPE_DOUBLE
:
137 if (a
->value
.d
[c0
] < b
->value
.d
[c1
])
139 else if (a
->value
.d
[c0
] > b
->value
.d
[c1
])
145 unreachable("not reached");
149 if (foundless
&& foundgreater
) {
150 /* Some components are strictly lower, others are strictly greater */
155 /* It is not mixed, but it is not strictly lower or greater */
157 return LESS_OR_EQUAL
;
159 return GREATER_OR_EQUAL
;
163 /* All components are strictly lower or strictly greater */
164 return foundless
? LESS
: GREATER
;
168 combine_constant(bool ismin
, ir_constant
*a
, ir_constant
*b
)
170 void *mem_ctx
= ralloc_parent(a
);
171 ir_constant
*c
= a
->clone(mem_ctx
, NULL
);
172 for (unsigned i
= 0; i
< c
->type
->components(); i
++) {
173 switch (c
->type
->base_type
) {
175 if ((ismin
&& b
->value
.u
[i
] < c
->value
.u
[i
]) ||
176 (!ismin
&& b
->value
.u
[i
] > c
->value
.u
[i
]))
177 c
->value
.u
[i
] = b
->value
.u
[i
];
180 if ((ismin
&& b
->value
.i
[i
] < c
->value
.i
[i
]) ||
181 (!ismin
&& b
->value
.i
[i
] > c
->value
.i
[i
]))
182 c
->value
.i
[i
] = b
->value
.i
[i
];
184 case GLSL_TYPE_FLOAT
:
185 if ((ismin
&& b
->value
.f
[i
] < c
->value
.f
[i
]) ||
186 (!ismin
&& b
->value
.f
[i
] > c
->value
.f
[i
]))
187 c
->value
.f
[i
] = b
->value
.f
[i
];
189 case GLSL_TYPE_DOUBLE
:
190 if ((ismin
&& b
->value
.d
[i
] < c
->value
.d
[i
]) ||
191 (!ismin
&& b
->value
.d
[i
] > c
->value
.d
[i
]))
192 c
->value
.d
[i
] = b
->value
.d
[i
];
195 assert(!"not reached");
202 smaller_constant(ir_constant
*a
, ir_constant
*b
)
207 enum compare_components_result ret
= compare_components(a
, b
);
209 return combine_constant(true, a
, b
);
210 else if (ret
< EQUAL
)
217 larger_constant(ir_constant
*a
, ir_constant
*b
)
222 enum compare_components_result ret
= compare_components(a
, b
);
224 return combine_constant(false, a
, b
);
225 else if (ret
< EQUAL
)
231 /* Combines two ranges by doing an element-wise min() / max() depending on the
235 combine_range(minmax_range r0
, minmax_range r1
, bool ismin
)
240 ret
.low
= ismin
? r0
.low
: r1
.low
;
241 } else if (!r1
.low
) {
242 ret
.low
= ismin
? r1
.low
: r0
.low
;
244 ret
.low
= ismin
? smaller_constant(r0
.low
, r1
.low
) :
245 larger_constant(r0
.low
, r1
.low
);
249 ret
.high
= ismin
? r1
.high
: r0
.high
;
250 } else if (!r1
.high
) {
251 ret
.high
= ismin
? r0
.high
: r1
.high
;
253 ret
.high
= ismin
? smaller_constant(r0
.high
, r1
.high
) :
254 larger_constant(r0
.high
, r1
.high
);
260 /* Returns a range so that lower limit is the larger of the two lower limits,
261 * and higher limit is the smaller of the two higher limits.
264 range_intersection(minmax_range r0
, minmax_range r1
)
273 ret
.low
= larger_constant(r0
.low
, r1
.low
);
280 ret
.high
= smaller_constant(r0
.high
, r1
.high
);
286 get_range(ir_rvalue
*rval
)
288 ir_expression
*expr
= rval
->as_expression();
289 if (expr
&& (expr
->operation
== ir_binop_min
||
290 expr
->operation
== ir_binop_max
)) {
291 minmax_range r0
= get_range(expr
->operands
[0]);
292 minmax_range r1
= get_range(expr
->operands
[1]);
293 return combine_range(r0
, r1
, expr
->operation
== ir_binop_min
);
296 ir_constant
*c
= rval
->as_constant();
298 return minmax_range(c
, c
);
301 return minmax_range();
305 * Prunes a min/max expression considering the base range of the parent
306 * min/max expression.
308 * @param baserange the range that the parents of this min/max expression
309 * in the min/max tree will clamp its value to.
312 ir_minmax_visitor::prune_expression(ir_expression
*expr
, minmax_range baserange
)
314 assert(expr
->operation
== ir_binop_min
||
315 expr
->operation
== ir_binop_max
);
317 bool ismin
= expr
->operation
== ir_binop_min
;
318 minmax_range limits
[2];
320 /* Recurse to get the ranges for each of the subtrees of this
321 * expression. We need to do this as a separate step because we need to
322 * know the ranges of each of the subtrees before we prune either one.
323 * Consider something like this:
331 * We would like to prune away the max on the bottom-right, but to do so
332 * we need to know the range of the expression on the left beforehand,
333 * and there's no guarantee that we will visit either subtree in a
336 for (unsigned i
= 0; i
< 2; ++i
)
337 limits
[i
] = get_range(expr
->operands
[i
]);
339 for (unsigned i
= 0; i
< 2; ++i
) {
340 bool is_redundant
= false;
342 enum compare_components_result cr
= LESS
;
344 /* If this operand will always be greater than the other one, it's
347 if (limits
[i
].low
&& limits
[1 - i
].high
) {
348 cr
= compare_components(limits
[i
].low
, limits
[1 - i
].high
);
349 if (cr
>= EQUAL
&& cr
!= MIXED
)
352 /* If this operand is always greater than baserange, then even if
353 * it's smaller than the other one it'll get clamped, so it's
356 if (!is_redundant
&& limits
[i
].low
&& baserange
.high
) {
357 cr
= compare_components(limits
[i
].low
, baserange
.high
);
358 if (cr
>= EQUAL
&& cr
!= MIXED
)
362 /* If this operand will always be lower than the other one, it's
365 if (limits
[i
].high
&& limits
[1 - i
].low
) {
366 cr
= compare_components(limits
[i
].high
, limits
[1 - i
].low
);
370 /* If this operand is always lower than baserange, then even if
371 * it's greater than the other one it'll get clamped, so it's
374 if (!is_redundant
&& limits
[i
].high
&& baserange
.low
) {
375 cr
= compare_components(limits
[i
].high
, baserange
.low
);
384 /* Recurse if necessary. */
385 ir_expression
*op_expr
= expr
->operands
[1 - i
]->as_expression();
386 if (op_expr
&& (op_expr
->operation
== ir_binop_min
||
387 op_expr
->operation
== ir_binop_max
)) {
388 return prune_expression(op_expr
, baserange
);
391 return expr
->operands
[1 - i
];
392 } else if (cr
== MIXED
) {
393 /* If we have mixed vector operands, we can try to resolve the minmax
394 * expression by doing a component-wise minmax:
403 ir_constant
*a
= expr
->operands
[0]->as_constant();
404 ir_constant
*b
= expr
->operands
[1]->as_constant();
406 return combine_constant(ismin
, a
, b
);
410 /* Now recurse to operands giving them the proper baserange. The baserange
411 * to pass is the intersection of our baserange and the other operand's
412 * limit with one of the ranges unlimited. If we can't compute a valid
413 * intersection, we use the current baserange.
415 for (unsigned i
= 0; i
< 2; ++i
) {
416 ir_expression
*op_expr
= expr
->operands
[i
]->as_expression();
417 if (op_expr
&& (op_expr
->operation
== ir_binop_min
||
418 op_expr
->operation
== ir_binop_max
)) {
419 /* We can only compute a new baserange for this operand if we managed
420 * to compute a valid range for the other operand.
423 limits
[1 - i
].low
= NULL
;
425 limits
[1 - i
].high
= NULL
;
426 minmax_range base
= range_intersection(limits
[1 - i
], baserange
);
427 expr
->operands
[i
] = prune_expression(op_expr
, base
);
431 /* If we got here we could not discard any of the operands of the minmax
432 * expression, but we can still try to resolve the expression if both
433 * operands are constant. We do this after the loop above, to make sure
434 * that if our operands are minmax expressions we have tried to prune them
435 * first (hopefully reducing them to constants).
437 ir_constant
*a
= expr
->operands
[0]->as_constant();
438 ir_constant
*b
= expr
->operands
[1]->as_constant();
440 return combine_constant(ismin
, a
, b
);
446 swizzle_if_required(ir_expression
*expr
, ir_rvalue
*rval
)
448 if (expr
->type
->is_vector() && rval
->type
->is_scalar()) {
449 return swizzle(rval
, SWIZZLE_XXXX
, expr
->type
->vector_elements
);
456 ir_minmax_visitor::handle_rvalue(ir_rvalue
**rvalue
)
461 ir_expression
*expr
= (*rvalue
)->as_expression();
462 if (!expr
|| (expr
->operation
!= ir_binop_min
&&
463 expr
->operation
!= ir_binop_max
))
466 ir_rvalue
*new_rvalue
= prune_expression(expr
, minmax_range());
467 if (new_rvalue
== *rvalue
)
470 /* If the expression type is a vector and the optimization leaves a scalar
471 * as the result, we need to turn it into a vector.
473 *rvalue
= swizzle_if_required(expr
, new_rvalue
);
481 do_minmax_prune(exec_list
*instructions
)
485 visit_list_elements(&v
, instructions
);