2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_minmax.cpp
27 * Drop operands from an expression tree of only min/max operations if they
28 * can be proven to not contribute to the final result.
30 * The algorithm is similar to alpha-beta pruning on a minmax search.
34 #include "ir_visitor.h"
35 #include "ir_rvalue_visitor.h"
36 #include "ir_optimization.h"
37 #include "ir_builder.h"
38 #include "program/prog_instruction.h"
39 #include "compiler/glsl_types.h"
40 #include "main/macros.h"
41 #include "util/half_float.h"
43 using namespace ir_builder
;
47 enum compare_components_result
{
58 minmax_range(ir_constant
*low
= NULL
, ir_constant
*high
= NULL
)
64 /* low is the lower limit of the range, high is the higher limit. NULL on
65 * low means negative infinity (unlimited) and on high positive infinity
66 * (unlimited). Because of the two interpretations of the value NULL,
67 * arbitrary comparison between ir_constants is impossible.
73 class ir_minmax_visitor
: public ir_rvalue_enter_visitor
{
80 ir_rvalue
*prune_expression(ir_expression
*expr
, minmax_range baserange
);
82 void handle_rvalue(ir_rvalue
**rvalue
);
88 * Returns LESS if all vector components of `a' are strictly lower than of `b',
89 * GREATER if all vector components of `a' are strictly greater than of `b',
90 * MIXED if some vector components of `a' are strictly lower than of `b' while
91 * others are strictly greater, or EQUAL otherwise.
93 static enum compare_components_result
94 compare_components(ir_constant
*a
, ir_constant
*b
)
99 assert(a
->type
->base_type
== b
->type
->base_type
);
101 unsigned a_inc
= a
->type
->is_scalar() ? 0 : 1;
102 unsigned b_inc
= b
->type
->is_scalar() ? 0 : 1;
103 unsigned components
= MAX2(a
->type
->components(), b
->type
->components());
105 bool foundless
= false;
106 bool foundgreater
= false;
107 bool foundequal
= false;
109 for (unsigned i
= 0, c0
= 0, c1
= 0;
111 c0
+= a_inc
, c1
+= b_inc
, ++i
) {
112 switch (a
->type
->base_type
) {
113 case GLSL_TYPE_UINT16
:
114 if (a
->value
.u16
[c0
] < b
->value
.u16
[c1
])
116 else if (a
->value
.u16
[c0
] > b
->value
.u16
[c1
])
121 case GLSL_TYPE_INT16
:
122 if (a
->value
.i16
[c0
] < b
->value
.i16
[c1
])
124 else if (a
->value
.i16
[c0
] > b
->value
.i16
[c1
])
130 if (a
->value
.u
[c0
] < b
->value
.u
[c1
])
132 else if (a
->value
.u
[c0
] > b
->value
.u
[c1
])
138 if (a
->value
.i
[c0
] < b
->value
.i
[c1
])
140 else if (a
->value
.i
[c0
] > b
->value
.i
[c1
])
145 case GLSL_TYPE_FLOAT16
: {
146 float af
= _mesa_half_to_float(a
->value
.f16
[c0
]);
147 float bf
= _mesa_half_to_float(b
->value
.f16
[c1
]);
156 case GLSL_TYPE_FLOAT
:
157 if (a
->value
.f
[c0
] < b
->value
.f
[c1
])
159 else if (a
->value
.f
[c0
] > b
->value
.f
[c1
])
164 case GLSL_TYPE_DOUBLE
:
165 if (a
->value
.d
[c0
] < b
->value
.d
[c1
])
167 else if (a
->value
.d
[c0
] > b
->value
.d
[c1
])
173 unreachable("not reached");
177 if (foundless
&& foundgreater
) {
178 /* Some components are strictly lower, others are strictly greater */
183 /* It is not mixed, but it is not strictly lower or greater */
185 return LESS_OR_EQUAL
;
187 return GREATER_OR_EQUAL
;
191 /* All components are strictly lower or strictly greater */
192 return foundless
? LESS
: GREATER
;
196 combine_constant(bool ismin
, ir_constant
*a
, ir_constant
*b
)
198 void *mem_ctx
= ralloc_parent(a
);
199 ir_constant
*c
= a
->clone(mem_ctx
, NULL
);
200 for (unsigned i
= 0; i
< c
->type
->components(); i
++) {
201 switch (c
->type
->base_type
) {
202 case GLSL_TYPE_UINT16
:
203 if ((ismin
&& b
->value
.u16
[i
] < c
->value
.u16
[i
]) ||
204 (!ismin
&& b
->value
.u16
[i
] > c
->value
.u16
[i
]))
205 c
->value
.u16
[i
] = b
->value
.u16
[i
];
207 case GLSL_TYPE_INT16
:
208 if ((ismin
&& b
->value
.i16
[i
] < c
->value
.i16
[i
]) ||
209 (!ismin
&& b
->value
.i16
[i
] > c
->value
.i16
[i
]))
210 c
->value
.i16
[i
] = b
->value
.i16
[i
];
213 if ((ismin
&& b
->value
.u
[i
] < c
->value
.u
[i
]) ||
214 (!ismin
&& b
->value
.u
[i
] > c
->value
.u
[i
]))
215 c
->value
.u
[i
] = b
->value
.u
[i
];
218 if ((ismin
&& b
->value
.i
[i
] < c
->value
.i
[i
]) ||
219 (!ismin
&& b
->value
.i
[i
] > c
->value
.i
[i
]))
220 c
->value
.i
[i
] = b
->value
.i
[i
];
222 case GLSL_TYPE_FLOAT16
: {
223 float bf
= _mesa_half_to_float(b
->value
.f16
[i
]);
224 float cf
= _mesa_half_to_float(c
->value
.f16
[i
]);
225 if ((ismin
&& bf
< cf
) || (!ismin
&& bf
> cf
))
226 c
->value
.f16
[i
] = b
->value
.f16
[i
];
229 case GLSL_TYPE_FLOAT
:
230 if ((ismin
&& b
->value
.f
[i
] < c
->value
.f
[i
]) ||
231 (!ismin
&& b
->value
.f
[i
] > c
->value
.f
[i
]))
232 c
->value
.f
[i
] = b
->value
.f
[i
];
234 case GLSL_TYPE_DOUBLE
:
235 if ((ismin
&& b
->value
.d
[i
] < c
->value
.d
[i
]) ||
236 (!ismin
&& b
->value
.d
[i
] > c
->value
.d
[i
]))
237 c
->value
.d
[i
] = b
->value
.d
[i
];
240 assert(!"not reached");
247 smaller_constant(ir_constant
*a
, ir_constant
*b
)
252 enum compare_components_result ret
= compare_components(a
, b
);
254 return combine_constant(true, a
, b
);
255 else if (ret
< EQUAL
)
262 larger_constant(ir_constant
*a
, ir_constant
*b
)
267 enum compare_components_result ret
= compare_components(a
, b
);
269 return combine_constant(false, a
, b
);
270 else if (ret
< EQUAL
)
276 /* Combines two ranges by doing an element-wise min() / max() depending on the
280 combine_range(minmax_range r0
, minmax_range r1
, bool ismin
)
285 ret
.low
= ismin
? r0
.low
: r1
.low
;
286 } else if (!r1
.low
) {
287 ret
.low
= ismin
? r1
.low
: r0
.low
;
289 ret
.low
= ismin
? smaller_constant(r0
.low
, r1
.low
) :
290 larger_constant(r0
.low
, r1
.low
);
294 ret
.high
= ismin
? r1
.high
: r0
.high
;
295 } else if (!r1
.high
) {
296 ret
.high
= ismin
? r0
.high
: r1
.high
;
298 ret
.high
= ismin
? smaller_constant(r0
.high
, r1
.high
) :
299 larger_constant(r0
.high
, r1
.high
);
305 /* Returns a range so that lower limit is the larger of the two lower limits,
306 * and higher limit is the smaller of the two higher limits.
309 range_intersection(minmax_range r0
, minmax_range r1
)
318 ret
.low
= larger_constant(r0
.low
, r1
.low
);
325 ret
.high
= smaller_constant(r0
.high
, r1
.high
);
331 get_range(ir_rvalue
*rval
)
333 ir_expression
*expr
= rval
->as_expression();
334 if (expr
&& (expr
->operation
== ir_binop_min
||
335 expr
->operation
== ir_binop_max
)) {
336 minmax_range r0
= get_range(expr
->operands
[0]);
337 minmax_range r1
= get_range(expr
->operands
[1]);
338 return combine_range(r0
, r1
, expr
->operation
== ir_binop_min
);
341 ir_constant
*c
= rval
->as_constant();
343 return minmax_range(c
, c
);
346 return minmax_range();
350 * Prunes a min/max expression considering the base range of the parent
351 * min/max expression.
353 * @param baserange the range that the parents of this min/max expression
354 * in the min/max tree will clamp its value to.
357 ir_minmax_visitor::prune_expression(ir_expression
*expr
, minmax_range baserange
)
359 assert(expr
->operation
== ir_binop_min
||
360 expr
->operation
== ir_binop_max
);
362 bool ismin
= expr
->operation
== ir_binop_min
;
363 minmax_range limits
[2];
365 /* Recurse to get the ranges for each of the subtrees of this
366 * expression. We need to do this as a separate step because we need to
367 * know the ranges of each of the subtrees before we prune either one.
368 * Consider something like this:
376 * We would like to prune away the max on the bottom-right, but to do so
377 * we need to know the range of the expression on the left beforehand,
378 * and there's no guarantee that we will visit either subtree in a
381 for (unsigned i
= 0; i
< 2; ++i
)
382 limits
[i
] = get_range(expr
->operands
[i
]);
384 for (unsigned i
= 0; i
< 2; ++i
) {
385 bool is_redundant
= false;
387 enum compare_components_result cr
= LESS
;
389 /* If this operand will always be greater than the other one, it's
392 if (limits
[i
].low
&& limits
[1 - i
].high
) {
393 cr
= compare_components(limits
[i
].low
, limits
[1 - i
].high
);
394 if (cr
>= EQUAL
&& cr
!= MIXED
)
397 /* If this operand is always greater than baserange, then even if
398 * it's smaller than the other one it'll get clamped, so it's
401 if (!is_redundant
&& limits
[i
].low
&& baserange
.high
) {
402 cr
= compare_components(limits
[i
].low
, baserange
.high
);
403 if (cr
> EQUAL
&& cr
!= MIXED
)
407 /* If this operand will always be lower than the other one, it's
410 if (limits
[i
].high
&& limits
[1 - i
].low
) {
411 cr
= compare_components(limits
[i
].high
, limits
[1 - i
].low
);
415 /* If this operand is always lower than baserange, then even if
416 * it's greater than the other one it'll get clamped, so it's
419 if (!is_redundant
&& limits
[i
].high
&& baserange
.low
) {
420 cr
= compare_components(limits
[i
].high
, baserange
.low
);
429 /* Recurse if necessary. */
430 ir_expression
*op_expr
= expr
->operands
[1 - i
]->as_expression();
431 if (op_expr
&& (op_expr
->operation
== ir_binop_min
||
432 op_expr
->operation
== ir_binop_max
)) {
433 return prune_expression(op_expr
, baserange
);
436 return expr
->operands
[1 - i
];
437 } else if (cr
== MIXED
) {
438 /* If we have mixed vector operands, we can try to resolve the minmax
439 * expression by doing a component-wise minmax:
448 ir_constant
*a
= expr
->operands
[0]->as_constant();
449 ir_constant
*b
= expr
->operands
[1]->as_constant();
451 return combine_constant(ismin
, a
, b
);
455 /* Now recurse to operands giving them the proper baserange. The baserange
456 * to pass is the intersection of our baserange and the other operand's
457 * limit with one of the ranges unlimited. If we can't compute a valid
458 * intersection, we use the current baserange.
460 for (unsigned i
= 0; i
< 2; ++i
) {
461 ir_expression
*op_expr
= expr
->operands
[i
]->as_expression();
462 if (op_expr
&& (op_expr
->operation
== ir_binop_min
||
463 op_expr
->operation
== ir_binop_max
)) {
464 /* We can only compute a new baserange for this operand if we managed
465 * to compute a valid range for the other operand.
468 limits
[1 - i
].low
= NULL
;
470 limits
[1 - i
].high
= NULL
;
471 minmax_range base
= range_intersection(limits
[1 - i
], baserange
);
472 expr
->operands
[i
] = prune_expression(op_expr
, base
);
476 /* If we got here we could not discard any of the operands of the minmax
477 * expression, but we can still try to resolve the expression if both
478 * operands are constant. We do this after the loop above, to make sure
479 * that if our operands are minmax expressions we have tried to prune them
480 * first (hopefully reducing them to constants).
482 ir_constant
*a
= expr
->operands
[0]->as_constant();
483 ir_constant
*b
= expr
->operands
[1]->as_constant();
485 return combine_constant(ismin
, a
, b
);
491 swizzle_if_required(ir_expression
*expr
, ir_rvalue
*rval
)
493 if (expr
->type
->is_vector() && rval
->type
->is_scalar()) {
494 return swizzle(rval
, SWIZZLE_XXXX
, expr
->type
->vector_elements
);
501 ir_minmax_visitor::handle_rvalue(ir_rvalue
**rvalue
)
506 ir_expression
*expr
= (*rvalue
)->as_expression();
507 if (!expr
|| (expr
->operation
!= ir_binop_min
&&
508 expr
->operation
!= ir_binop_max
))
511 ir_rvalue
*new_rvalue
= prune_expression(expr
, minmax_range());
512 if (new_rvalue
== *rvalue
)
515 /* If the expression type is a vector and the optimization leaves a scalar
516 * as the result, we need to turn it into a vector.
518 *rvalue
= swizzle_if_required(expr
, new_rvalue
);
526 do_minmax_prune(exec_list
*instructions
)
530 visit_list_elements(&v
, instructions
);