2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_rebalance_tree.cpp
27 * Rebalances a reduction expression tree.
29 * For reduction operations (e.g., x + y + z + w) we generate an expression
40 * which we can rebalance into
49 * to get a better instruction scheduling.
51 * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
52 * and Bette L. Warren.
54 * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
55 * explanation of the of the tree_to_vine() (rightward rotation) and
56 * vine_to_tree() (leftward rotation) algorithms.
60 #include "ir_visitor.h"
61 #include "ir_rvalue_visitor.h"
62 #include "ir_optimization.h"
63 #include "main/macros.h" /* for MAX2 */
65 /* The DSW algorithm generates a degenerate tree (really, a linked list) in
66 * tree_to_vine(). We'd rather not leave a binary expression with only one
67 * operand, so trivial modifications (the ternary operators below) are needed
68 * to ensure that we only rotate around the ir_expression nodes of the tree.
71 tree_to_vine(ir_expression
*root
)
74 ir_rvalue
*vine_tail
= root
;
75 ir_rvalue
*remainder
= root
->operands
[1];
77 while (remainder
!= NULL
) {
78 ir_expression
*remainder_temp
= remainder
->as_expression();
79 ir_expression
*remainder_left
= remainder_temp
?
80 remainder_temp
->operands
[0]->as_expression() : NULL
;
82 if (remainder_left
== NULL
) {
83 /* move vine_tail down one */
84 vine_tail
= remainder
;
85 remainder
= remainder
->as_expression() ?
86 ((ir_expression
*)remainder
)->operands
[1] : NULL
;
90 ir_expression
*tempptr
= remainder_left
;
91 ((ir_expression
*)remainder
)->operands
[0] = tempptr
->operands
[1];
92 tempptr
->operands
[1] = remainder
;
94 ((ir_expression
*)vine_tail
)->operands
[1] = tempptr
;
102 compression(ir_expression
*root
, unsigned count
)
104 ir_expression
*scanner
= root
;
106 for (unsigned i
= 0; i
< count
; i
++) {
107 ir_expression
*child
= (ir_expression
*)scanner
->operands
[1];
108 scanner
->operands
[1] = child
->operands
[1];
109 scanner
= (ir_expression
*)scanner
->operands
[1];
110 child
->operands
[1] = scanner
->operands
[0];
111 scanner
->operands
[0] = child
;
116 vine_to_tree(ir_expression
*root
, unsigned size
)
119 for (int m
= n
/ 2; m
> 0; m
= n
/ 2) {
120 compression(root
, m
);
127 class ir_rebalance_visitor
: public ir_rvalue_enter_visitor
{
129 ir_rebalance_visitor()
134 void handle_rvalue(ir_rvalue
**rvalue
);
139 struct is_reduction_data
{
140 ir_expression_operation operation
;
141 const glsl_type
*type
;
144 bool contains_constant
;
147 } /* anonymous namespace */
150 is_reduction_operation(ir_expression_operation operation
)
155 case ir_binop_bit_and
:
156 case ir_binop_bit_xor
:
157 case ir_binop_bit_or
:
158 case ir_binop_logic_and
:
159 case ir_binop_logic_xor
:
160 case ir_binop_logic_or
:
169 /* Note that this function does not attempt to recognize that reduction trees
170 * are already balanced.
172 * We return false from this function for a number of reasons other than an
173 * expression tree not being a mathematical reduction. Namely,
175 * - if the tree contains multiple constants that we may be able to combine.
176 * - if the tree contains matrices:
177 * - they might contain vec4's with many constant components that we can
178 * simplify after splitting.
179 * - applying the matrix chain ordering optimization is more than just
180 * balancing an expression tree.
181 * - if the tree contains operations on multiple types.
182 * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
183 * would trick the visiting pass.
186 is_reduction(ir_instruction
*ir
, void *data
)
188 struct is_reduction_data
*ird
= (struct is_reduction_data
*)data
;
189 if (!ird
->is_reduction
)
192 /* We don't want to balance a tree that contains multiple constants, since
193 * we'll be able to constant fold them if they're not in separate subtrees.
195 if (ir
->as_constant()) {
196 if (ird
->contains_constant
) {
197 ird
->is_reduction
= false;
199 ird
->contains_constant
= true;
203 /* Array/record dereferences have subtrees that are not part of the expr
204 * tree we're balancing. Skip trees containing them.
206 if (ir
->ir_type
== ir_type_dereference_array
||
207 ir
->ir_type
== ir_type_dereference_record
) {
208 ird
->is_reduction
= false;
212 ir_expression
*expr
= ir
->as_expression();
216 /* Non-constant matrices might still contain constant vec4 that we can
217 * constant fold once split up. Handling matrices will need some more
220 if (expr
->type
->is_matrix() ||
221 expr
->operands
[0]->type
->is_matrix() ||
222 (expr
->operands
[1] && expr
->operands
[1]->type
->is_matrix())) {
223 ird
->is_reduction
= false;
227 if (ird
->type
!= NULL
&& ird
->type
!= expr
->type
) {
228 ird
->is_reduction
= false;
231 ird
->type
= expr
->type
;
234 if (is_reduction_operation(expr
->operation
)) {
235 if (ird
->operation
!= 0 && ird
->operation
!= expr
->operation
)
236 ird
->is_reduction
= false;
237 ird
->operation
= expr
->operation
;
239 ird
->is_reduction
= false;
244 handle_expression(ir_expression
*expr
)
246 struct is_reduction_data ird
;
247 ird
.operation
= (ir_expression_operation
)0;
250 ird
.is_reduction
= true;
251 ird
.contains_constant
= false;
253 visit_tree(expr
, is_reduction
, (void *)&ird
);
255 if (ird
.is_reduction
&& ird
.num_expr
> 2) {
256 ir_constant z
= ir_constant(0.0f
);
257 ir_expression pseudo_root
= ir_expression(ir_binop_add
, &z
, expr
);
259 unsigned size
= tree_to_vine(&pseudo_root
);
260 vine_to_tree(&pseudo_root
, size
);
262 expr
= (ir_expression
*)pseudo_root
.operands
[1];
268 update_types(ir_instruction
*ir
, void *)
270 ir_expression
*expr
= ir
->as_expression();
275 glsl_type::get_instance(expr
->type
->base_type
,
276 MAX2(expr
->operands
[0]->type
->components(),
277 expr
->operands
[1]->type
->components()),
282 ir_rebalance_visitor::handle_rvalue(ir_rvalue
**rvalue
)
287 ir_expression
*expr
= (*rvalue
)->as_expression();
288 if (!expr
|| !is_reduction_operation(expr
->operation
))
291 ir_rvalue
*new_rvalue
= handle_expression(expr
);
293 /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
294 * or some other set of cases) new_rvalue will point to the same root as
297 * Similarly, if the tree rooted at *rvalue was a reduction and was already
298 * balanced, the algorithm will rearrange the tree but will ultimately
299 * return an identical tree, so this check will handle that as well and
300 * will not set progress = true.
302 if (new_rvalue
== *rvalue
)
305 visit_tree(new_rvalue
, NULL
, NULL
, update_types
);
307 *rvalue
= new_rvalue
;
308 this->progress
= true;
312 do_rebalance_tree(exec_list
*instructions
)
314 ir_rebalance_visitor v
;