2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_rebalance_tree.cpp
27 * Rebalances a reduction expression tree.
29 * For reduction operations (e.g., x + y + z + w) we generate an expression
40 * which we can rebalance into
49 * to get a better instruction scheduling.
51 * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
52 * and Bette L. Warren.
54 * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
55 * explanation of the of the tree_to_vine() (rightward rotation) and
56 * vine_to_tree() (leftward rotation) algorithms.
60 #include "ir_visitor.h"
61 #include "ir_rvalue_visitor.h"
62 #include "ir_optimization.h"
63 #include "main/macros.h" /* for MAX2 */
65 /* The DSW algorithm generates a degenerate tree (really, a linked list) in
66 * tree_to_vine(). We'd rather not leave a binary expression with only one
67 * operand, so trivial modifications (the ternary operators below) are needed
68 * to ensure that we only rotate around the ir_expression nodes of the tree.
71 tree_to_vine(ir_expression
*root
)
74 ir_rvalue
*vine_tail
= root
;
75 ir_rvalue
*remainder
= root
->operands
[1];
77 while (remainder
!= NULL
) {
78 ir_expression
*remainder_temp
= remainder
->as_expression();
79 ir_expression
*remainder_left
= remainder_temp
?
80 remainder_temp
->operands
[0]->as_expression() : NULL
;
82 if (remainder_left
== NULL
) {
83 /* move vine_tail down one */
84 vine_tail
= remainder
;
85 remainder
= remainder
->as_expression() ?
86 ((ir_expression
*)remainder
)->operands
[1] : NULL
;
90 ir_expression
*tempptr
= remainder_left
;
91 ((ir_expression
*)remainder
)->operands
[0] = tempptr
->operands
[1];
92 tempptr
->operands
[1] = remainder
;
94 ((ir_expression
*)vine_tail
)->operands
[1] = tempptr
;
102 compression(ir_expression
*root
, unsigned count
)
104 ir_expression
*scanner
= root
;
106 for (unsigned i
= 0; i
< count
; i
++) {
107 ir_expression
*child
= (ir_expression
*)scanner
->operands
[1];
108 scanner
->operands
[1] = child
->operands
[1];
109 scanner
= (ir_expression
*)scanner
->operands
[1];
110 child
->operands
[1] = scanner
->operands
[0];
111 scanner
->operands
[0] = child
;
116 vine_to_tree(ir_expression
*root
, unsigned size
)
119 for (int m
= n
/ 2; m
> 0; m
= n
/ 2) {
120 compression(root
, m
);
127 class ir_rebalance_visitor
: public ir_rvalue_enter_visitor
{
129 ir_rebalance_visitor()
134 virtual ir_visitor_status
visit_enter(ir_assignment
*ir
);
136 void handle_rvalue(ir_rvalue
**rvalue
);
141 struct is_reduction_data
{
142 ir_expression_operation operation
;
143 const glsl_type
*type
;
146 bool contains_constant
;
149 } /* anonymous namespace */
152 ir_rebalance_visitor::visit_enter(ir_assignment
*ir
)
154 ir_variable
*var
= ir
->lhs
->variable_referenced();
155 if (var
->data
.invariant
|| var
->data
.precise
) {
156 /* If we're assigning to an invariant variable, just bail. Tree
157 * rebalancing (reassociation) isn't precision-safe.
159 return visit_continue_with_parent
;
161 return visit_continue
;
166 is_reduction_operation(ir_expression_operation operation
)
171 case ir_binop_bit_and
:
172 case ir_binop_bit_xor
:
173 case ir_binop_bit_or
:
174 case ir_binop_logic_and
:
175 case ir_binop_logic_xor
:
176 case ir_binop_logic_or
:
185 /* Note that this function does not attempt to recognize that reduction trees
186 * are already balanced.
188 * We return false from this function for a number of reasons other than an
189 * expression tree not being a mathematical reduction. Namely,
191 * - if the tree contains multiple constants that we may be able to combine.
192 * - if the tree contains matrices:
193 * - they might contain vec4's with many constant components that we can
194 * simplify after splitting.
195 * - applying the matrix chain ordering optimization is more than just
196 * balancing an expression tree.
197 * - if the tree contains operations on multiple types.
198 * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
199 * would trick the visiting pass.
202 is_reduction(ir_instruction
*ir
, void *data
)
204 struct is_reduction_data
*ird
= (struct is_reduction_data
*)data
;
205 if (!ird
->is_reduction
)
208 /* We don't want to balance a tree that contains multiple constants, since
209 * we'll be able to constant fold them if they're not in separate subtrees.
211 if (ir
->as_constant()) {
212 if (ird
->contains_constant
) {
213 ird
->is_reduction
= false;
215 ird
->contains_constant
= true;
219 /* Array/record dereferences have subtrees that are not part of the expr
220 * tree we're balancing. Skip trees containing them.
222 if (ir
->ir_type
== ir_type_dereference_array
||
223 ir
->ir_type
== ir_type_dereference_record
) {
224 ird
->is_reduction
= false;
228 ir_expression
*expr
= ir
->as_expression();
232 /* Non-constant matrices might still contain constant vec4 that we can
233 * constant fold once split up. Handling matrices will need some more
236 if (expr
->type
->is_matrix() ||
237 expr
->operands
[0]->type
->is_matrix() ||
238 (expr
->operands
[1] && expr
->operands
[1]->type
->is_matrix())) {
239 ird
->is_reduction
= false;
243 if (ird
->type
!= NULL
&& ird
->type
!= expr
->type
) {
244 ird
->is_reduction
= false;
247 ird
->type
= expr
->type
;
250 if (is_reduction_operation(expr
->operation
)) {
251 if (ird
->operation
!= 0 && ird
->operation
!= expr
->operation
)
252 ird
->is_reduction
= false;
253 ird
->operation
= expr
->operation
;
255 ird
->is_reduction
= false;
260 handle_expression(ir_expression
*expr
)
262 struct is_reduction_data ird
;
263 ird
.operation
= (ir_expression_operation
)0;
266 ird
.is_reduction
= true;
267 ird
.contains_constant
= false;
269 visit_tree(expr
, is_reduction
, (void *)&ird
);
271 if (ird
.is_reduction
&& ird
.num_expr
> 2) {
272 ir_constant z
= ir_constant(0.0f
);
273 ir_expression pseudo_root
= ir_expression(ir_binop_add
, &z
, expr
);
275 unsigned size
= tree_to_vine(&pseudo_root
);
276 vine_to_tree(&pseudo_root
, size
);
278 expr
= (ir_expression
*)pseudo_root
.operands
[1];
284 update_types(ir_instruction
*ir
, void *)
286 ir_expression
*expr
= ir
->as_expression();
290 const glsl_type
*const new_type
=
291 glsl_type::get_instance(expr
->type
->base_type
,
292 MAX2(expr
->operands
[0]->type
->vector_elements
,
293 expr
->operands
[1]->type
->vector_elements
),
295 assert(new_type
!= glsl_type::error_type
);
296 expr
->type
= new_type
;
300 ir_rebalance_visitor::handle_rvalue(ir_rvalue
**rvalue
)
305 ir_expression
*expr
= (*rvalue
)->as_expression();
306 if (!expr
|| !is_reduction_operation(expr
->operation
))
309 ir_rvalue
*new_rvalue
= handle_expression(expr
);
311 /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
312 * or some other set of cases) new_rvalue will point to the same root as
315 * Similarly, if the tree rooted at *rvalue was a reduction and was already
316 * balanced, the algorithm will rearrange the tree but will ultimately
317 * return an identical tree, so this check will handle that as well and
318 * will not set progress = true.
320 if (new_rvalue
== *rvalue
)
323 visit_tree(new_rvalue
, NULL
, NULL
, update_types
);
325 *rvalue
= new_rvalue
;
326 this->progress
= true;
330 do_rebalance_tree(exec_list
*instructions
)
332 ir_rebalance_visitor v
;