util/hash_set: Rework the API to know about hashing
[mesa.git] / src / glsl / opt_cse.cpp
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_cse.cpp
26 *
27 * constant subexpression elimination at the GLSL IR level.
28 *
29 * Compare to brw_fs_cse.cpp for a more complete CSE implementation. This one
30 * is generic and handles texture operations, but it's rather simple currently
31 * and doesn't support modification of variables in the available expressions
32 * list, so it can't do variables other than uniforms or shader inputs.
33 */
34
35 #include "ir.h"
36 #include "ir_visitor.h"
37 #include "ir_rvalue_visitor.h"
38 #include "ir_basic_block.h"
39 #include "ir_optimization.h"
40 #include "ir_builder.h"
41 #include "glsl_types.h"
42
43 using namespace ir_builder;
44
45 static bool debug = false;
46
47 namespace {
48
49 /**
50 * This is the record of an available expression for common subexpression
51 * elimination.
52 */
53 class ae_entry : public exec_node
54 {
55 public:
56 ae_entry(ir_instruction *base_ir, ir_rvalue **val)
57 : val(val), base_ir(base_ir)
58 {
59 assert(val);
60 assert(*val);
61 assert(base_ir);
62
63 var = NULL;
64 }
65
66 /**
67 * The pointer to the expression that we might be able to reuse
68 *
69 * Note the double pointer -- this is the place in the base_ir expression
70 * tree that we would rewrite to move the expression out to a new variable
71 * assignment.
72 */
73 ir_rvalue **val;
74
75 /**
76 * Root instruction in the basic block where the expression appeared.
77 *
78 * This is used so that we can insert the new variable declaration into the
79 * instruction stream (since *val is just somewhere in base_ir's expression
80 * tree).
81 */
82 ir_instruction *base_ir;
83
84 /**
85 * The variable that the expression has been stored in, if it's been CSEd
86 * once already.
87 */
88 ir_variable *var;
89 };
90
91 class cse_visitor : public ir_rvalue_visitor {
92 public:
93 cse_visitor(exec_list *validate_instructions)
94 : validate_instructions(validate_instructions)
95 {
96 progress = false;
97 mem_ctx = ralloc_context(NULL);
98 this->ae = new(mem_ctx) exec_list;
99 }
100 ~cse_visitor()
101 {
102 ralloc_free(mem_ctx);
103 }
104
105 virtual ir_visitor_status visit_enter(ir_function_signature *ir);
106 virtual ir_visitor_status visit_enter(ir_loop *ir);
107 virtual ir_visitor_status visit_enter(ir_if *ir);
108 virtual ir_visitor_status visit_enter(ir_call *ir);
109 virtual void handle_rvalue(ir_rvalue **rvalue);
110
111 bool progress;
112
113 private:
114 void *mem_ctx;
115
116 ir_rvalue *try_cse(ir_rvalue *rvalue);
117 void add_to_ae(ir_rvalue **rvalue);
118
119 /** List of ae_entry: The available expressions to reuse */
120 exec_list *ae;
121
122 /**
123 * The whole shader, so that we can validate_ir_tree in debug mode.
124 *
125 * This proved quite useful when trying to get the tree manipulation
126 * right.
127 */
128 exec_list *validate_instructions;
129 };
130
131 /**
132 * Visitor to walk an expression tree to check that all variables referenced
133 * are constants.
134 */
135 class is_cse_candidate_visitor : public ir_hierarchical_visitor
136 {
137 public:
138
139 is_cse_candidate_visitor()
140 : ok(true)
141 {
142 }
143
144 virtual ir_visitor_status visit(ir_dereference_variable *ir);
145
146 bool ok;
147 };
148
149
150 class contains_rvalue_visitor : public ir_rvalue_visitor
151 {
152 public:
153
154 contains_rvalue_visitor(ir_rvalue *val)
155 : val(val)
156 {
157 found = false;
158 }
159
160 virtual void handle_rvalue(ir_rvalue **rvalue);
161
162 bool found;
163
164 private:
165 ir_rvalue *val;
166 };
167
168 } /* unnamed namespace */
169
170 static void
171 dump_ae(exec_list *ae)
172 {
173 int i = 0;
174
175 printf("CSE: AE contents:\n");
176 foreach_in_list(ae_entry, entry, ae) {
177 printf("CSE: AE %2d (%p): ", i, entry);
178 (*entry->val)->print();
179 printf("\n");
180
181 if (entry->var)
182 printf("CSE: in var %p:\n", entry->var);
183
184 i++;
185 }
186 }
187
188 ir_visitor_status
189 is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
190 {
191 /* Currently, since we don't handle kills of the ae based on variables
192 * getting assigned, we can only handle constant variables.
193 */
194 if (ir->var->data.read_only) {
195 return visit_continue;
196 } else {
197 if (debug)
198 printf("CSE: non-candidate: var %s is not read only\n", ir->var->name);
199 ok = false;
200 return visit_stop;
201 }
202 }
203
204 void
205 contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
206 {
207 if (*rvalue == val)
208 found = true;
209 }
210
211 static bool
212 contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
213 {
214 contains_rvalue_visitor v(needle);
215 haystack->accept(&v);
216 return v.found;
217 }
218
219 static bool
220 is_cse_candidate(ir_rvalue *ir)
221 {
222 /* Our temporary variable assignment generation isn't ready to handle
223 * anything bigger than a vector.
224 */
225 if (!ir->type->is_vector() && !ir->type->is_scalar()) {
226 if (debug)
227 printf("CSE: non-candidate: not a vector/scalar\n");
228 return false;
229 }
230
231 /* Only handle expressions and textures currently. We may want to extend
232 * to variable-index array dereferences at some point.
233 */
234 switch (ir->ir_type) {
235 case ir_type_expression:
236 case ir_type_texture:
237 break;
238 default:
239 if (debug)
240 printf("CSE: non-candidate: not an expression/texture\n");
241 return false;
242 }
243
244 is_cse_candidate_visitor v;
245
246 ir->accept(&v);
247
248 return v.ok;
249 }
250
251 /**
252 * Tries to find and return a reference to a previous computation of a given
253 * expression.
254 *
255 * Walk the list of available expressions checking if any of them match the
256 * rvalue, and if so, move the previous copy of the expression to a temporary
257 * and return a reference of the temporary.
258 */
259 ir_rvalue *
260 cse_visitor::try_cse(ir_rvalue *rvalue)
261 {
262 foreach_in_list(ae_entry, entry, ae) {
263 if (debug) {
264 printf("Comparing to AE %p: ", entry);
265 (*entry->val)->print();
266 printf("\n");
267 }
268
269 if (!rvalue->equals(*entry->val))
270 continue;
271
272 if (debug) {
273 printf("CSE: Replacing: ");
274 (*entry->val)->print();
275 printf("\n");
276 printf("CSE: with: ");
277 rvalue->print();
278 printf("\n");
279 }
280
281 if (!entry->var) {
282 ir_instruction *base_ir = entry->base_ir;
283
284 ir_variable *var = new(rvalue) ir_variable(rvalue->type,
285 "cse",
286 ir_var_temporary);
287
288 /* Write the previous expression result into a new variable. */
289 base_ir->insert_before(var);
290 ir_assignment *assignment = assign(var, *entry->val);
291 base_ir->insert_before(assignment);
292
293 /* Replace the expression in the original tree with a deref of the
294 * variable, but keep tracking the expression for further reuse.
295 */
296 *entry->val = new(rvalue) ir_dereference_variable(var);
297 entry->val = &assignment->rhs;
298
299 entry->var = var;
300
301 /* Update the base_irs in the AE list. We have to be sure that
302 * they're correct -- expressions from our base_ir that weren't moved
303 * need to stay in this base_ir (so that later consumption of them
304 * puts new variables between our new variable and our base_ir), but
305 * expressions from our base_ir that we *did* move need base_ir
306 * updated so that any further elimination from inside gets its new
307 * assignments put before our new assignment.
308 */
309 foreach_in_list(ae_entry, fixup_entry, ae) {
310 if (contains_rvalue(assignment->rhs, *fixup_entry->val))
311 fixup_entry->base_ir = assignment;
312 }
313
314 if (debug)
315 dump_ae(ae);
316 }
317
318 /* Replace the expression in our current tree with the variable. */
319 return new(rvalue) ir_dereference_variable(entry->var);
320 }
321
322 return NULL;
323 }
324
325 /** Add the rvalue to the list of available expressions for CSE. */
326 void
327 cse_visitor::add_to_ae(ir_rvalue **rvalue)
328 {
329 if (debug) {
330 printf("CSE: Add to AE: ");
331 (*rvalue)->print();
332 printf("\n");
333 }
334
335 ae->push_tail(new(mem_ctx) ae_entry(base_ir, rvalue));
336
337 if (debug)
338 dump_ae(ae);
339 }
340
341 void
342 cse_visitor::handle_rvalue(ir_rvalue **rvalue)
343 {
344 if (!*rvalue)
345 return;
346
347 if (debug) {
348 printf("CSE: handle_rvalue ");
349 (*rvalue)->print();
350 printf("\n");
351 }
352
353 if (!is_cse_candidate(*rvalue))
354 return;
355
356 ir_rvalue *new_rvalue = try_cse(*rvalue);
357 if (new_rvalue) {
358 *rvalue = new_rvalue;
359 progress = true;
360
361 if (debug)
362 validate_ir_tree(validate_instructions);
363 } else {
364 add_to_ae(rvalue);
365 }
366 }
367
368 ir_visitor_status
369 cse_visitor::visit_enter(ir_if *ir)
370 {
371 handle_rvalue(&ir->condition);
372
373 ae->make_empty();
374 visit_list_elements(this, &ir->then_instructions);
375
376 ae->make_empty();
377 visit_list_elements(this, &ir->else_instructions);
378
379 ae->make_empty();
380 return visit_continue_with_parent;
381 }
382
383 ir_visitor_status
384 cse_visitor::visit_enter(ir_function_signature *ir)
385 {
386 ae->make_empty();
387 visit_list_elements(this, &ir->body);
388
389 ae->make_empty();
390 return visit_continue_with_parent;
391 }
392
393 ir_visitor_status
394 cse_visitor::visit_enter(ir_loop *ir)
395 {
396 ae->make_empty();
397 visit_list_elements(this, &ir->body_instructions);
398
399 ae->make_empty();
400 return visit_continue_with_parent;
401 }
402
403 ir_visitor_status
404 cse_visitor::visit_enter(ir_call *)
405 {
406 /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a
407 * pointer to the (ir_rvalue *) on the stack. Since we save those pointers
408 * in the AE list, we can't let handle_rvalue get called.
409 */
410 return visit_continue_with_parent;
411 }
412
413 /**
414 * Does a (uniform-value) constant subexpression elimination pass on the code
415 * present in the instruction stream.
416 */
417 bool
418 do_cse(exec_list *instructions)
419 {
420 cse_visitor v(instructions);
421
422 visit_list_elements(&v, instructions);
423
424 return v.progress;
425 }