nir: Move nir_cursor to nir.h.
[mesa.git] / src / glsl / opt_cse.cpp
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_cse.cpp
26 *
27 * constant subexpression elimination at the GLSL IR level.
28 *
29 * Compare to brw_fs_cse.cpp for a more complete CSE implementation. This one
30 * is generic and handles texture operations, but it's rather simple currently
31 * and doesn't support modification of variables in the available expressions
32 * list, so it can't do variables other than uniforms or shader inputs.
33 */
34
35 #include "ir.h"
36 #include "ir_visitor.h"
37 #include "ir_rvalue_visitor.h"
38 #include "ir_basic_block.h"
39 #include "ir_optimization.h"
40 #include "ir_builder.h"
41 #include "glsl_types.h"
42
43 using namespace ir_builder;
44
45 static bool debug = false;
46
47 namespace {
48
49 /**
50 * This is the record of an available expression for common subexpression
51 * elimination.
52 */
53 class ae_entry : public exec_node
54 {
55 public:
56 ae_entry(ir_instruction *base_ir, ir_rvalue **val)
57 : val(val), base_ir(base_ir)
58 {
59 assert(val);
60 assert(*val);
61 assert(base_ir);
62
63 var = NULL;
64 }
65
66 void init(ir_instruction *base_ir, ir_rvalue **val)
67 {
68 this->val = val;
69 this->base_ir = base_ir;
70 this->var = NULL;
71
72 assert(val);
73 assert(*val);
74 assert(base_ir);
75 }
76
77 /**
78 * The pointer to the expression that we might be able to reuse
79 *
80 * Note the double pointer -- this is the place in the base_ir expression
81 * tree that we would rewrite to move the expression out to a new variable
82 * assignment.
83 */
84 ir_rvalue **val;
85
86 /**
87 * Root instruction in the basic block where the expression appeared.
88 *
89 * This is used so that we can insert the new variable declaration into the
90 * instruction stream (since *val is just somewhere in base_ir's expression
91 * tree).
92 */
93 ir_instruction *base_ir;
94
95 /**
96 * The variable that the expression has been stored in, if it's been CSEd
97 * once already.
98 */
99 ir_variable *var;
100 };
101
102 class cse_visitor : public ir_rvalue_visitor {
103 public:
104 cse_visitor(exec_list *validate_instructions)
105 : validate_instructions(validate_instructions)
106 {
107 progress = false;
108 mem_ctx = ralloc_context(NULL);
109 this->ae = new(mem_ctx) exec_list;
110 }
111 ~cse_visitor()
112 {
113 ralloc_free(mem_ctx);
114 }
115
116 virtual ir_visitor_status visit_enter(ir_function_signature *ir);
117 virtual ir_visitor_status visit_enter(ir_loop *ir);
118 virtual ir_visitor_status visit_enter(ir_if *ir);
119 virtual ir_visitor_status visit_enter(ir_call *ir);
120 virtual void handle_rvalue(ir_rvalue **rvalue);
121
122 bool progress;
123
124 private:
125 void *mem_ctx;
126
127 ir_rvalue *try_cse(ir_rvalue *rvalue);
128 void add_to_ae(ir_rvalue **rvalue);
129
130 /**
131 * Move all nodes from the ae list to the free list
132 */
133 void empty_ae_list();
134
135 /**
136 * Get and initialize a new ae_entry
137 *
138 * This will either come from the free list or be freshly allocated.
139 */
140 ae_entry *get_ae_entry(ir_rvalue **rvalue);
141
142 /** List of ae_entry: The available expressions to reuse */
143 exec_list *ae;
144
145 /**
146 * The whole shader, so that we can validate_ir_tree in debug mode.
147 *
148 * This proved quite useful when trying to get the tree manipulation
149 * right.
150 */
151 exec_list *validate_instructions;
152
153 /**
154 * List of available-for-use ae_entry objects.
155 */
156 exec_list free_ae_entries;
157 };
158
159 /**
160 * Visitor to walk an expression tree to check that all variables referenced
161 * are constants.
162 */
163 class is_cse_candidate_visitor : public ir_hierarchical_visitor
164 {
165 public:
166
167 is_cse_candidate_visitor()
168 : ok(true)
169 {
170 }
171
172 virtual ir_visitor_status visit(ir_dereference_variable *ir);
173
174 bool ok;
175 };
176
177
178 class contains_rvalue_visitor : public ir_rvalue_visitor
179 {
180 public:
181
182 contains_rvalue_visitor(ir_rvalue *val)
183 : val(val)
184 {
185 found = false;
186 }
187
188 virtual void handle_rvalue(ir_rvalue **rvalue);
189
190 bool found;
191
192 private:
193 ir_rvalue *val;
194 };
195
196 } /* unnamed namespace */
197
198 static void
199 dump_ae(exec_list *ae)
200 {
201 int i = 0;
202
203 printf("CSE: AE contents:\n");
204 foreach_in_list(ae_entry, entry, ae) {
205 printf("CSE: AE %2d (%p): ", i, entry);
206 (*entry->val)->print();
207 printf("\n");
208
209 if (entry->var)
210 printf("CSE: in var %p:\n", entry->var);
211
212 i++;
213 }
214 }
215
216 ir_visitor_status
217 is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
218 {
219 /* Currently, since we don't handle kills of the ae based on variables
220 * getting assigned, we can only handle constant variables.
221 */
222 if (ir->var->data.read_only) {
223 return visit_continue;
224 } else {
225 if (debug)
226 printf("CSE: non-candidate: var %s is not read only\n", ir->var->name);
227 ok = false;
228 return visit_stop;
229 }
230 }
231
232 void
233 contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
234 {
235 if (*rvalue == val)
236 found = true;
237 }
238
239 static bool
240 contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
241 {
242 contains_rvalue_visitor v(needle);
243 haystack->accept(&v);
244 return v.found;
245 }
246
247 static bool
248 is_cse_candidate(ir_rvalue *ir)
249 {
250 /* Our temporary variable assignment generation isn't ready to handle
251 * anything bigger than a vector.
252 */
253 if (!ir->type->is_vector() && !ir->type->is_scalar()) {
254 if (debug)
255 printf("CSE: non-candidate: not a vector/scalar\n");
256 return false;
257 }
258
259 /* Only handle expressions and textures currently. We may want to extend
260 * to variable-index array dereferences at some point.
261 */
262 switch (ir->ir_type) {
263 case ir_type_expression:
264 case ir_type_texture:
265 break;
266 default:
267 if (debug)
268 printf("CSE: non-candidate: not an expression/texture\n");
269 return false;
270 }
271
272 is_cse_candidate_visitor v;
273
274 ir->accept(&v);
275
276 return v.ok;
277 }
278
279 /**
280 * Tries to find and return a reference to a previous computation of a given
281 * expression.
282 *
283 * Walk the list of available expressions checking if any of them match the
284 * rvalue, and if so, move the previous copy of the expression to a temporary
285 * and return a reference of the temporary.
286 */
287 ir_rvalue *
288 cse_visitor::try_cse(ir_rvalue *rvalue)
289 {
290 foreach_in_list(ae_entry, entry, ae) {
291 if (debug) {
292 printf("Comparing to AE %p: ", entry);
293 (*entry->val)->print();
294 printf("\n");
295 }
296
297 if (!rvalue->equals(*entry->val))
298 continue;
299
300 if (debug) {
301 printf("CSE: Replacing: ");
302 (*entry->val)->print();
303 printf("\n");
304 printf("CSE: with: ");
305 rvalue->print();
306 printf("\n");
307 }
308
309 if (!entry->var) {
310 ir_instruction *base_ir = entry->base_ir;
311
312 ir_variable *var = new(rvalue) ir_variable(rvalue->type,
313 "cse",
314 ir_var_temporary);
315
316 /* Write the previous expression result into a new variable. */
317 base_ir->insert_before(var);
318 ir_assignment *assignment = assign(var, *entry->val);
319 base_ir->insert_before(assignment);
320
321 /* Replace the expression in the original tree with a deref of the
322 * variable, but keep tracking the expression for further reuse.
323 */
324 *entry->val = new(rvalue) ir_dereference_variable(var);
325 entry->val = &assignment->rhs;
326
327 entry->var = var;
328
329 /* Update the base_irs in the AE list. We have to be sure that
330 * they're correct -- expressions from our base_ir that weren't moved
331 * need to stay in this base_ir (so that later consumption of them
332 * puts new variables between our new variable and our base_ir), but
333 * expressions from our base_ir that we *did* move need base_ir
334 * updated so that any further elimination from inside gets its new
335 * assignments put before our new assignment.
336 */
337 foreach_in_list(ae_entry, fixup_entry, ae) {
338 if (contains_rvalue(assignment->rhs, *fixup_entry->val))
339 fixup_entry->base_ir = assignment;
340 }
341
342 if (debug)
343 dump_ae(ae);
344 }
345
346 /* Replace the expression in our current tree with the variable. */
347 return new(rvalue) ir_dereference_variable(entry->var);
348 }
349
350 return NULL;
351 }
352
353 void
354 cse_visitor::empty_ae_list()
355 {
356 free_ae_entries.append_list(ae);
357 }
358
359 ae_entry *
360 cse_visitor::get_ae_entry(ir_rvalue **rvalue)
361 {
362 ae_entry *entry = (ae_entry *) free_ae_entries.pop_head();
363 if (entry) {
364 entry->init(base_ir, rvalue);
365 } else {
366 entry = new(mem_ctx) ae_entry(base_ir, rvalue);
367 }
368
369 return entry;
370 }
371
372 /** Add the rvalue to the list of available expressions for CSE. */
373 void
374 cse_visitor::add_to_ae(ir_rvalue **rvalue)
375 {
376 if (debug) {
377 printf("CSE: Add to AE: ");
378 (*rvalue)->print();
379 printf("\n");
380 }
381
382 ae->push_tail(get_ae_entry(rvalue));
383
384 if (debug)
385 dump_ae(ae);
386 }
387
388 void
389 cse_visitor::handle_rvalue(ir_rvalue **rvalue)
390 {
391 if (!*rvalue)
392 return;
393
394 if (debug) {
395 printf("CSE: handle_rvalue ");
396 (*rvalue)->print();
397 printf("\n");
398 }
399
400 if (!is_cse_candidate(*rvalue))
401 return;
402
403 ir_rvalue *new_rvalue = try_cse(*rvalue);
404 if (new_rvalue) {
405 *rvalue = new_rvalue;
406 progress = true;
407
408 if (debug)
409 validate_ir_tree(validate_instructions);
410 } else {
411 add_to_ae(rvalue);
412 }
413 }
414
415 ir_visitor_status
416 cse_visitor::visit_enter(ir_if *ir)
417 {
418 handle_rvalue(&ir->condition);
419
420 empty_ae_list();
421 visit_list_elements(this, &ir->then_instructions);
422
423 empty_ae_list();
424 visit_list_elements(this, &ir->else_instructions);
425
426 empty_ae_list();
427 return visit_continue_with_parent;
428 }
429
430 ir_visitor_status
431 cse_visitor::visit_enter(ir_function_signature *ir)
432 {
433 empty_ae_list();
434 visit_list_elements(this, &ir->body);
435
436 empty_ae_list();
437 return visit_continue_with_parent;
438 }
439
440 ir_visitor_status
441 cse_visitor::visit_enter(ir_loop *ir)
442 {
443 empty_ae_list();
444 visit_list_elements(this, &ir->body_instructions);
445
446 empty_ae_list();
447 return visit_continue_with_parent;
448 }
449
450 ir_visitor_status
451 cse_visitor::visit_enter(ir_call *)
452 {
453 /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a
454 * pointer to the (ir_rvalue *) on the stack. Since we save those pointers
455 * in the AE list, we can't let handle_rvalue get called.
456 */
457 return visit_continue_with_parent;
458 }
459
460 /**
461 * Does a (uniform-value) constant subexpression elimination pass on the code
462 * present in the instruction stream.
463 */
464 bool
465 do_cse(exec_list *instructions)
466 {
467 cse_visitor v(instructions);
468
469 visit_list_elements(&v, instructions);
470
471 return v.progress;
472 }