2 * Copyright © 2013 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
27 * constant subexpression elimination at the GLSL IR level.
29 * Compare to brw_fs_cse.cpp for a more complete CSE implementation. This one
30 * is generic and handles texture operations, but it's rather simple currently
31 * and doesn't support modification of variables in the available expressions
32 * list, so it can't do variables other than uniforms or shader inputs.
36 #include "ir_visitor.h"
37 #include "ir_rvalue_visitor.h"
38 #include "ir_basic_block.h"
39 #include "ir_optimization.h"
40 #include "ir_builder.h"
41 #include "glsl_types.h"
43 using namespace ir_builder
;
45 static bool debug
= false;
50 * This is the record of an available expression for common subexpression
53 class ae_entry
: public exec_node
56 ae_entry(ir_instruction
*base_ir
, ir_rvalue
**val
)
57 : val(val
), base_ir(base_ir
)
66 void init(ir_instruction
*base_ir
, ir_rvalue
**val
)
69 this->base_ir
= base_ir
;
78 * The pointer to the expression that we might be able to reuse
80 * Note the double pointer -- this is the place in the base_ir expression
81 * tree that we would rewrite to move the expression out to a new variable
87 * Root instruction in the basic block where the expression appeared.
89 * This is used so that we can insert the new variable declaration into the
90 * instruction stream (since *val is just somewhere in base_ir's expression
93 ir_instruction
*base_ir
;
96 * The variable that the expression has been stored in, if it's been CSEd
102 class cse_visitor
: public ir_rvalue_visitor
{
104 cse_visitor(exec_list
*validate_instructions
)
105 : validate_instructions(validate_instructions
)
108 mem_ctx
= ralloc_context(NULL
);
109 this->ae
= new(mem_ctx
) exec_list
;
113 ralloc_free(mem_ctx
);
116 virtual ir_visitor_status
visit_enter(ir_function_signature
*ir
);
117 virtual ir_visitor_status
visit_enter(ir_loop
*ir
);
118 virtual ir_visitor_status
visit_enter(ir_if
*ir
);
119 virtual ir_visitor_status
visit_enter(ir_call
*ir
);
120 virtual void handle_rvalue(ir_rvalue
**rvalue
);
127 ir_rvalue
*try_cse(ir_rvalue
*rvalue
);
128 void add_to_ae(ir_rvalue
**rvalue
);
131 * Move all nodes from the ae list to the free list
133 void empty_ae_list();
136 * Get and initialize a new ae_entry
138 * This will either come from the free list or be freshly allocated.
140 ae_entry
*get_ae_entry(ir_rvalue
**rvalue
);
142 /** List of ae_entry: The available expressions to reuse */
146 * The whole shader, so that we can validate_ir_tree in debug mode.
148 * This proved quite useful when trying to get the tree manipulation
151 exec_list
*validate_instructions
;
154 * List of available-for-use ae_entry objects.
156 exec_list free_ae_entries
;
160 * Visitor to walk an expression tree to check that all variables referenced
163 class is_cse_candidate_visitor
: public ir_hierarchical_visitor
167 is_cse_candidate_visitor()
172 virtual ir_visitor_status
visit(ir_dereference_variable
*ir
);
178 class contains_rvalue_visitor
: public ir_rvalue_visitor
182 contains_rvalue_visitor(ir_rvalue
*val
)
188 virtual void handle_rvalue(ir_rvalue
**rvalue
);
196 } /* unnamed namespace */
199 dump_ae(exec_list
*ae
)
203 printf("CSE: AE contents:\n");
204 foreach_in_list(ae_entry
, entry
, ae
) {
205 printf("CSE: AE %2d (%p): ", i
, entry
);
206 (*entry
->val
)->print();
210 printf("CSE: in var %p:\n", entry
->var
);
217 is_cse_candidate_visitor::visit(ir_dereference_variable
*ir
)
219 /* Currently, since we don't handle kills of the ae based on variables
220 * getting assigned, we can only handle constant variables.
222 if (ir
->var
->data
.read_only
) {
223 return visit_continue
;
226 printf("CSE: non-candidate: var %s is not read only\n", ir
->var
->name
);
233 contains_rvalue_visitor::handle_rvalue(ir_rvalue
**rvalue
)
240 contains_rvalue(ir_rvalue
*haystack
, ir_rvalue
*needle
)
242 contains_rvalue_visitor
v(needle
);
243 haystack
->accept(&v
);
248 is_cse_candidate(ir_rvalue
*ir
)
250 /* Our temporary variable assignment generation isn't ready to handle
251 * anything bigger than a vector.
253 if (!ir
->type
->is_vector() && !ir
->type
->is_scalar()) {
255 printf("CSE: non-candidate: not a vector/scalar\n");
259 /* Only handle expressions and textures currently. We may want to extend
260 * to variable-index array dereferences at some point.
262 switch (ir
->ir_type
) {
263 case ir_type_expression
:
264 case ir_type_texture
:
268 printf("CSE: non-candidate: not an expression/texture\n");
272 is_cse_candidate_visitor v
;
280 * Tries to find and return a reference to a previous computation of a given
283 * Walk the list of available expressions checking if any of them match the
284 * rvalue, and if so, move the previous copy of the expression to a temporary
285 * and return a reference of the temporary.
288 cse_visitor::try_cse(ir_rvalue
*rvalue
)
290 foreach_in_list(ae_entry
, entry
, ae
) {
292 printf("Comparing to AE %p: ", entry
);
293 (*entry
->val
)->print();
297 if (!rvalue
->equals(*entry
->val
))
301 printf("CSE: Replacing: ");
302 (*entry
->val
)->print();
304 printf("CSE: with: ");
310 ir_instruction
*base_ir
= entry
->base_ir
;
312 ir_variable
*var
= new(rvalue
) ir_variable(rvalue
->type
,
316 /* Write the previous expression result into a new variable. */
317 base_ir
->insert_before(var
);
318 ir_assignment
*assignment
= assign(var
, *entry
->val
);
319 base_ir
->insert_before(assignment
);
321 /* Replace the expression in the original tree with a deref of the
322 * variable, but keep tracking the expression for further reuse.
324 *entry
->val
= new(rvalue
) ir_dereference_variable(var
);
325 entry
->val
= &assignment
->rhs
;
329 /* Update the base_irs in the AE list. We have to be sure that
330 * they're correct -- expressions from our base_ir that weren't moved
331 * need to stay in this base_ir (so that later consumption of them
332 * puts new variables between our new variable and our base_ir), but
333 * expressions from our base_ir that we *did* move need base_ir
334 * updated so that any further elimination from inside gets its new
335 * assignments put before our new assignment.
337 foreach_in_list(ae_entry
, fixup_entry
, ae
) {
338 if (contains_rvalue(assignment
->rhs
, *fixup_entry
->val
))
339 fixup_entry
->base_ir
= assignment
;
346 /* Replace the expression in our current tree with the variable. */
347 return new(rvalue
) ir_dereference_variable(entry
->var
);
354 cse_visitor::empty_ae_list()
356 free_ae_entries
.append_list(ae
);
360 cse_visitor::get_ae_entry(ir_rvalue
**rvalue
)
362 ae_entry
*entry
= (ae_entry
*) free_ae_entries
.pop_head();
364 entry
->init(base_ir
, rvalue
);
366 entry
= new(mem_ctx
) ae_entry(base_ir
, rvalue
);
372 /** Add the rvalue to the list of available expressions for CSE. */
374 cse_visitor::add_to_ae(ir_rvalue
**rvalue
)
377 printf("CSE: Add to AE: ");
382 ae
->push_tail(get_ae_entry(rvalue
));
389 cse_visitor::handle_rvalue(ir_rvalue
**rvalue
)
395 printf("CSE: handle_rvalue ");
400 if (!is_cse_candidate(*rvalue
))
403 ir_rvalue
*new_rvalue
= try_cse(*rvalue
);
405 *rvalue
= new_rvalue
;
409 validate_ir_tree(validate_instructions
);
416 cse_visitor::visit_enter(ir_if
*ir
)
418 handle_rvalue(&ir
->condition
);
421 visit_list_elements(this, &ir
->then_instructions
);
424 visit_list_elements(this, &ir
->else_instructions
);
427 return visit_continue_with_parent
;
431 cse_visitor::visit_enter(ir_function_signature
*ir
)
434 visit_list_elements(this, &ir
->body
);
437 return visit_continue_with_parent
;
441 cse_visitor::visit_enter(ir_loop
*ir
)
444 visit_list_elements(this, &ir
->body_instructions
);
447 return visit_continue_with_parent
;
451 cse_visitor::visit_enter(ir_call
*)
453 /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a
454 * pointer to the (ir_rvalue *) on the stack. Since we save those pointers
455 * in the AE list, we can't let handle_rvalue get called.
457 return visit_continue_with_parent
;
461 * Does a (uniform-value) constant subexpression elimination pass on the code
462 * present in the instruction stream.
465 do_cse(exec_list
*instructions
)
467 cse_visitor
v(instructions
);
469 visit_list_elements(&v
, instructions
);