glsl: move to compiler/
[mesa.git] / src / compiler / glsl / opt_rebalance_tree.cpp
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_rebalance_tree.cpp
26 *
27 * Rebalances a reduction expression tree.
28 *
29 * For reduction operations (e.g., x + y + z + w) we generate an expression
30 * tree like
31 *
32 * +
33 * / \
34 * + w
35 * / \
36 * + z
37 * / \
38 * x y
39 *
40 * which we can rebalance into
41 *
42 * +
43 * / \
44 * / \
45 * + +
46 * / \ / \
47 * x y z w
48 *
49 * to get a better instruction scheduling.
50 *
51 * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
52 * and Bette L. Warren.
53 *
54 * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
55 * explanation of the of the tree_to_vine() (rightward rotation) and
56 * vine_to_tree() (leftward rotation) algorithms.
57 */
58
59 #include "ir.h"
60 #include "ir_visitor.h"
61 #include "ir_rvalue_visitor.h"
62 #include "ir_optimization.h"
63 #include "main/macros.h" /* for MAX2 */
64
65 /* The DSW algorithm generates a degenerate tree (really, a linked list) in
66 * tree_to_vine(). We'd rather not leave a binary expression with only one
67 * operand, so trivial modifications (the ternary operators below) are needed
68 * to ensure that we only rotate around the ir_expression nodes of the tree.
69 */
70 static unsigned
71 tree_to_vine(ir_expression *root)
72 {
73 unsigned size = 0;
74 ir_rvalue *vine_tail = root;
75 ir_rvalue *remainder = root->operands[1];
76
77 while (remainder != NULL) {
78 ir_expression *remainder_temp = remainder->as_expression();
79 ir_expression *remainder_left = remainder_temp ?
80 remainder_temp->operands[0]->as_expression() : NULL;
81
82 if (remainder_left == NULL) {
83 /* move vine_tail down one */
84 vine_tail = remainder;
85 remainder = remainder->as_expression() ?
86 ((ir_expression *)remainder)->operands[1] : NULL;
87 size++;
88 } else {
89 /* rotate */
90 ir_expression *tempptr = remainder_left;
91 ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
92 tempptr->operands[1] = remainder;
93 remainder = tempptr;
94 ((ir_expression *)vine_tail)->operands[1] = tempptr;
95 }
96 }
97
98 return size;
99 }
100
101 static void
102 compression(ir_expression *root, unsigned count)
103 {
104 ir_expression *scanner = root;
105
106 for (unsigned i = 0; i < count; i++) {
107 ir_expression *child = (ir_expression *)scanner->operands[1];
108 scanner->operands[1] = child->operands[1];
109 scanner = (ir_expression *)scanner->operands[1];
110 child->operands[1] = scanner->operands[0];
111 scanner->operands[0] = child;
112 }
113 }
114
115 static void
116 vine_to_tree(ir_expression *root, unsigned size)
117 {
118 int n = size - 1;
119 for (int m = n / 2; m > 0; m = n / 2) {
120 compression(root, m);
121 n -= m + 1;
122 }
123 }
124
125 namespace {
126
127 class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
128 public:
129 ir_rebalance_visitor()
130 {
131 progress = false;
132 }
133
134 void handle_rvalue(ir_rvalue **rvalue);
135
136 bool progress;
137 };
138
139 struct is_reduction_data {
140 ir_expression_operation operation;
141 const glsl_type *type;
142 unsigned num_expr;
143 bool is_reduction;
144 bool contains_constant;
145 };
146
147 } /* anonymous namespace */
148
149 static bool
150 is_reduction_operation(ir_expression_operation operation)
151 {
152 switch (operation) {
153 case ir_binop_add:
154 case ir_binop_mul:
155 case ir_binop_bit_and:
156 case ir_binop_bit_xor:
157 case ir_binop_bit_or:
158 case ir_binop_logic_and:
159 case ir_binop_logic_xor:
160 case ir_binop_logic_or:
161 case ir_binop_min:
162 case ir_binop_max:
163 return true;
164 default:
165 return false;
166 }
167 }
168
169 /* Note that this function does not attempt to recognize that reduction trees
170 * are already balanced.
171 *
172 * We return false from this function for a number of reasons other than an
173 * expression tree not being a mathematical reduction. Namely,
174 *
175 * - if the tree contains multiple constants that we may be able to combine.
176 * - if the tree contains matrices:
177 * - they might contain vec4's with many constant components that we can
178 * simplify after splitting.
179 * - applying the matrix chain ordering optimization is more than just
180 * balancing an expression tree.
181 * - if the tree contains operations on multiple types.
182 * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
183 * would trick the visiting pass.
184 */
185 static void
186 is_reduction(ir_instruction *ir, void *data)
187 {
188 struct is_reduction_data *ird = (struct is_reduction_data *)data;
189 if (!ird->is_reduction)
190 return;
191
192 /* We don't want to balance a tree that contains multiple constants, since
193 * we'll be able to constant fold them if they're not in separate subtrees.
194 */
195 if (ir->as_constant()) {
196 if (ird->contains_constant) {
197 ird->is_reduction = false;
198 }
199 ird->contains_constant = true;
200 return;
201 }
202
203 /* Array/record dereferences have subtrees that are not part of the expr
204 * tree we're balancing. Skip trees containing them.
205 */
206 if (ir->ir_type == ir_type_dereference_array ||
207 ir->ir_type == ir_type_dereference_record) {
208 ird->is_reduction = false;
209 return;
210 }
211
212 ir_expression *expr = ir->as_expression();
213 if (!expr)
214 return;
215
216 /* Non-constant matrices might still contain constant vec4 that we can
217 * constant fold once split up. Handling matrices will need some more
218 * work.
219 */
220 if (expr->type->is_matrix() ||
221 expr->operands[0]->type->is_matrix() ||
222 (expr->operands[1] && expr->operands[1]->type->is_matrix())) {
223 ird->is_reduction = false;
224 return;
225 }
226
227 if (ird->type != NULL && ird->type != expr->type) {
228 ird->is_reduction = false;
229 return;
230 }
231 ird->type = expr->type;
232
233 ird->num_expr++;
234 if (is_reduction_operation(expr->operation)) {
235 if (ird->operation != 0 && ird->operation != expr->operation)
236 ird->is_reduction = false;
237 ird->operation = expr->operation;
238 } else {
239 ird->is_reduction = false;
240 }
241 }
242
243 static ir_rvalue *
244 handle_expression(ir_expression *expr)
245 {
246 struct is_reduction_data ird;
247 ird.operation = (ir_expression_operation)0;
248 ird.type = NULL;
249 ird.num_expr = 0;
250 ird.is_reduction = true;
251 ird.contains_constant = false;
252
253 visit_tree(expr, is_reduction, (void *)&ird);
254
255 if (ird.is_reduction && ird.num_expr > 2) {
256 ir_constant z = ir_constant(0.0f);
257 ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
258
259 unsigned size = tree_to_vine(&pseudo_root);
260 vine_to_tree(&pseudo_root, size);
261
262 expr = (ir_expression *)pseudo_root.operands[1];
263 }
264 return expr;
265 }
266
267 static void
268 update_types(ir_instruction *ir, void *)
269 {
270 ir_expression *expr = ir->as_expression();
271 if (!expr)
272 return;
273
274 const glsl_type *const new_type =
275 glsl_type::get_instance(expr->type->base_type,
276 MAX2(expr->operands[0]->type->vector_elements,
277 expr->operands[1]->type->vector_elements),
278 1);
279 assert(new_type != glsl_type::error_type);
280 expr->type = new_type;
281 }
282
283 void
284 ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
285 {
286 if (!*rvalue)
287 return;
288
289 ir_expression *expr = (*rvalue)->as_expression();
290 if (!expr || !is_reduction_operation(expr->operation))
291 return;
292
293 ir_rvalue *new_rvalue = handle_expression(expr);
294
295 /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
296 * or some other set of cases) new_rvalue will point to the same root as
297 * before.
298 *
299 * Similarly, if the tree rooted at *rvalue was a reduction and was already
300 * balanced, the algorithm will rearrange the tree but will ultimately
301 * return an identical tree, so this check will handle that as well and
302 * will not set progress = true.
303 */
304 if (new_rvalue == *rvalue)
305 return;
306
307 visit_tree(new_rvalue, NULL, NULL, update_types);
308
309 *rvalue = new_rvalue;
310 this->progress = true;
311 }
312
313 bool
314 do_rebalance_tree(exec_list *instructions)
315 {
316 ir_rebalance_visitor v;
317
318 v.run(instructions);
319
320 return v.progress;
321 }