2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * constant of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, constant, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above constantright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR CONSTANTRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_constant_propagation.cpp
27 * Tracks assignments of constants to channels of variables, and
28 * usage of those constant channels with direct usage of the constants.
30 * This can lead to constant folding and algebraic optimizations in
31 * those later expressions, while causing no increase in instruction
32 * count (due to constants being generally free to load from a
33 * constant push buffer or as instruction immediate values) and
34 * possibly reducing register pressure.
38 #include "ir_visitor.h"
39 #include "ir_rvalue_visitor.h"
40 #include "ir_basic_block.h"
41 #include "ir_optimization.h"
42 #include "compiler/glsl_types.h"
43 #include "util/hash_table.h"
47 class acp_entry
: public exec_node
50 /* override operator new from exec_node */
51 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry
)
53 acp_entry(ir_variable
*var
, unsigned write_mask
, ir_constant
*constant
)
58 this->write_mask
= write_mask
;
59 this->constant
= constant
;
60 this->initial_values
= write_mask
;
63 acp_entry(const acp_entry
*src
)
66 this->write_mask
= src
->write_mask
;
67 this->constant
= src
->constant
;
68 this->initial_values
= src
->initial_values
;
72 ir_constant
*constant
;
75 /** Mask of values initially available in the constant. */
76 unsigned initial_values
;
80 class ir_constant_propagation_visitor
: public ir_rvalue_visitor
{
82 ir_constant_propagation_visitor()
86 mem_ctx
= ralloc_context(0);
87 this->lin_ctx
= linear_alloc_parent(this->mem_ctx
, 0);
88 this->acp
= new(mem_ctx
) exec_list
;
89 this->kills
= _mesa_hash_table_create(mem_ctx
, _mesa_hash_pointer
,
90 _mesa_key_pointer_equal
);
92 ~ir_constant_propagation_visitor()
97 virtual ir_visitor_status
visit_enter(class ir_loop
*);
98 virtual ir_visitor_status
visit_enter(class ir_function_signature
*);
99 virtual ir_visitor_status
visit_enter(class ir_function
*);
100 virtual ir_visitor_status
visit_leave(class ir_assignment
*);
101 virtual ir_visitor_status
visit_enter(class ir_call
*);
102 virtual ir_visitor_status
visit_enter(class ir_if
*);
104 void add_constant(ir_assignment
*ir
);
105 void constant_folding(ir_rvalue
**rvalue
);
106 void constant_propagation(ir_rvalue
**rvalue
);
107 void kill(ir_variable
*ir
, unsigned write_mask
);
108 void handle_if_block(exec_list
*instructions
, hash_table
*kills
, bool *killed_all
);
109 void handle_loop(class ir_loop
*, bool keep_acp
);
110 void handle_rvalue(ir_rvalue
**rvalue
);
112 /** List of acp_entry: The available constants to propagate */
116 * Hash table of killed entries: maps variables to the mask of killed channels.
130 ir_constant_propagation_visitor::constant_folding(ir_rvalue
**rvalue
)
132 if (this->in_assignee
|| *rvalue
== NULL
)
135 if (ir_constant_fold(rvalue
))
136 this->progress
= true;
138 ir_dereference_variable
*var_ref
= (*rvalue
)->as_dereference_variable();
139 if (var_ref
&& !var_ref
->type
->is_array()) {
140 ir_constant
*constant
=
141 var_ref
->constant_expression_value(ralloc_parent(var_ref
));
144 this->progress
= true;
150 ir_constant_propagation_visitor::constant_propagation(ir_rvalue
**rvalue
) {
152 if (this->in_assignee
|| !*rvalue
)
155 const glsl_type
*type
= (*rvalue
)->type
;
156 if (!type
->is_scalar() && !type
->is_vector())
159 ir_swizzle
*swiz
= NULL
;
160 ir_dereference_variable
*deref
= (*rvalue
)->as_dereference_variable();
162 swiz
= (*rvalue
)->as_swizzle();
166 deref
= swiz
->val
->as_dereference_variable();
171 ir_constant_data data
;
172 memset(&data
, 0, sizeof(data
));
174 for (unsigned int i
= 0; i
< type
->components(); i
++) {
176 acp_entry
*found
= NULL
;
180 case 0: channel
= swiz
->mask
.x
; break;
181 case 1: channel
= swiz
->mask
.y
; break;
182 case 2: channel
= swiz
->mask
.z
; break;
183 case 3: channel
= swiz
->mask
.w
; break;
184 default: assert(!"shouldn't be reached"); channel
= 0; break;
190 foreach_in_list(acp_entry
, entry
, this->acp
) {
191 if (entry
->var
== deref
->var
&& entry
->write_mask
& (1 << channel
)) {
201 for (int j
= 0; j
< 4; j
++) {
204 if (found
->initial_values
& (1 << j
))
208 switch (type
->base_type
) {
209 case GLSL_TYPE_FLOAT
:
210 data
.f
[i
] = found
->constant
->value
.f
[rhs_channel
];
212 case GLSL_TYPE_DOUBLE
:
213 data
.d
[i
] = found
->constant
->value
.d
[rhs_channel
];
216 data
.i
[i
] = found
->constant
->value
.i
[rhs_channel
];
219 data
.u
[i
] = found
->constant
->value
.u
[rhs_channel
];
222 data
.b
[i
] = found
->constant
->value
.b
[rhs_channel
];
224 case GLSL_TYPE_UINT64
:
225 data
.u64
[i
] = found
->constant
->value
.u64
[rhs_channel
];
227 case GLSL_TYPE_INT64
:
228 data
.i64
[i
] = found
->constant
->value
.i64
[rhs_channel
];
231 assert(!"not reached");
236 *rvalue
= new(ralloc_parent(deref
)) ir_constant(type
, &data
);
237 this->progress
= true;
241 ir_constant_propagation_visitor::handle_rvalue(ir_rvalue
**rvalue
)
243 constant_propagation(rvalue
);
244 constant_folding(rvalue
);
248 ir_constant_propagation_visitor::visit_enter(ir_function_signature
*ir
)
250 /* Treat entry into a function signature as a completely separate
251 * block. Any instructions at global scope will be shuffled into
252 * main() at link time, so they're irrelevant to us.
254 exec_list
*orig_acp
= this->acp
;
255 hash_table
*orig_kills
= this->kills
;
256 bool orig_killed_all
= this->killed_all
;
258 this->acp
= new(mem_ctx
) exec_list
;
259 this->kills
= _mesa_hash_table_create(mem_ctx
, _mesa_hash_pointer
,
260 _mesa_key_pointer_equal
);
261 this->killed_all
= false;
263 visit_list_elements(this, &ir
->body
);
265 this->kills
= orig_kills
;
266 this->acp
= orig_acp
;
267 this->killed_all
= orig_killed_all
;
269 return visit_continue_with_parent
;
273 ir_constant_propagation_visitor::visit_leave(ir_assignment
*ir
)
275 constant_folding(&ir
->rhs
);
277 if (this->in_assignee
)
278 return visit_continue
;
280 unsigned kill_mask
= ir
->write_mask
;
281 if (ir
->lhs
->as_dereference_array()) {
282 /* The LHS of the assignment uses an array indexing operator (e.g. v[i]
283 * = ...;). Since we only try to constant propagate vectors and
284 * scalars, this means that either (a) array indexing is being used to
285 * select a vector component, or (b) the variable in question is neither
286 * a scalar or a vector, so we don't care about it. In the former case,
287 * we want to kill the whole vector, since in general we can't predict
288 * which vector component will be selected by array indexing. In the
289 * latter case, it doesn't matter what we do, so go ahead and kill the
290 * whole variable anyway.
292 * Note that if the array index is constant (e.g. v[2] = ...;), we could
293 * in principle be smarter, but we don't need to, because a future
294 * optimization pass will convert it to a simple assignment with the
299 kill(ir
->lhs
->variable_referenced(), kill_mask
);
303 return visit_continue
;
307 ir_constant_propagation_visitor::visit_enter(ir_function
*ir
)
310 return visit_continue
;
314 ir_constant_propagation_visitor::visit_enter(ir_call
*ir
)
316 /* Do constant propagation on call parameters, but skip any out params */
317 foreach_two_lists(formal_node
, &ir
->callee
->parameters
,
318 actual_node
, &ir
->actual_parameters
) {
319 ir_variable
*sig_param
= (ir_variable
*) formal_node
;
320 ir_rvalue
*param
= (ir_rvalue
*) actual_node
;
321 if (sig_param
->data
.mode
!= ir_var_function_out
322 && sig_param
->data
.mode
!= ir_var_function_inout
) {
323 ir_rvalue
*new_param
= param
;
324 handle_rvalue(&new_param
);
325 if (new_param
!= param
)
326 param
->replace_with(new_param
);
332 /* Since we're unlinked, we don't (necssarily) know the side effects of
333 * this call. So kill all copies.
336 this->killed_all
= true;
338 return visit_continue_with_parent
;
342 ir_constant_propagation_visitor::handle_if_block(exec_list
*instructions
, hash_table
*kills
, bool *killed_all
)
344 exec_list
*orig_acp
= this->acp
;
345 hash_table
*orig_kills
= this->kills
;
346 bool orig_killed_all
= this->killed_all
;
348 this->acp
= new(mem_ctx
) exec_list
;
350 this->killed_all
= false;
352 /* Populate the initial acp with a constant of the original */
353 foreach_in_list(acp_entry
, a
, orig_acp
) {
354 this->acp
->push_tail(new(this->lin_ctx
) acp_entry(a
));
357 visit_list_elements(this, instructions
);
359 *killed_all
= this->killed_all
;
360 this->kills
= orig_kills
;
361 this->acp
= orig_acp
;
362 this->killed_all
= orig_killed_all
;
366 ir_constant_propagation_visitor::visit_enter(ir_if
*ir
)
368 ir
->condition
->accept(this);
369 handle_rvalue(&ir
->condition
);
371 hash_table
*new_kills
= _mesa_hash_table_create(mem_ctx
, _mesa_hash_pointer
,
372 _mesa_key_pointer_equal
);
373 bool then_killed_all
= false;
374 bool else_killed_all
= false;
376 handle_if_block(&ir
->then_instructions
, new_kills
, &then_killed_all
);
377 handle_if_block(&ir
->else_instructions
, new_kills
, &else_killed_all
);
379 if (then_killed_all
|| else_killed_all
) {
383 hash_table_foreach(new_kills
, htk
)
384 kill((ir_variable
*) htk
->key
, (uintptr_t) htk
->data
);
387 _mesa_hash_table_destroy(new_kills
, NULL
);
389 /* handle_if_block() already descended into the children. */
390 return visit_continue_with_parent
;
394 ir_constant_propagation_visitor::handle_loop(ir_loop
*ir
, bool keep_acp
)
396 exec_list
*orig_acp
= this->acp
;
397 hash_table
*orig_kills
= this->kills
;
398 bool orig_killed_all
= this->killed_all
;
400 this->acp
= new(mem_ctx
) exec_list
;
401 this->kills
= _mesa_hash_table_create(mem_ctx
, _mesa_hash_pointer
,
402 _mesa_key_pointer_equal
);
403 this->killed_all
= false;
406 foreach_in_list(acp_entry
, a
, orig_acp
) {
407 this->acp
->push_tail(new(this->lin_ctx
) acp_entry(a
));
411 visit_list_elements(this, &ir
->body_instructions
);
413 if (this->killed_all
) {
414 orig_acp
->make_empty();
417 hash_table
*new_kills
= this->kills
;
418 this->kills
= orig_kills
;
419 this->acp
= orig_acp
;
420 this->killed_all
= this->killed_all
|| orig_killed_all
;
422 hash_table_foreach(new_kills
, htk
) {
423 kill((ir_variable
*) htk
->key
, (uintptr_t) htk
->data
);
428 ir_constant_propagation_visitor::visit_enter(ir_loop
*ir
)
430 /* Make a conservative first pass over the loop with an empty ACP set.
431 * This also removes any killed entries from the original ACP set.
433 handle_loop(ir
, false);
435 /* Then, run it again with the real ACP set, minus any killed entries.
436 * This takes care of propagating values from before the loop into it.
438 handle_loop(ir
, true);
440 /* already descended into the children. */
441 return visit_continue_with_parent
;
445 ir_constant_propagation_visitor::kill(ir_variable
*var
, unsigned write_mask
)
449 /* We don't track non-vectors. */
450 if (!var
->type
->is_vector() && !var
->type
->is_scalar())
453 /* Remove any entries currently in the ACP for this kill. */
454 foreach_in_list_safe(acp_entry
, entry
, this->acp
) {
455 if (entry
->var
== var
) {
456 entry
->write_mask
&= ~write_mask
;
457 if (entry
->write_mask
== 0)
462 /* Add this writemask of the variable to the hash table of killed
463 * variables in this block.
465 hash_entry
*kill_hash_entry
= _mesa_hash_table_search(this->kills
, var
);
466 if (kill_hash_entry
) {
467 uintptr_t new_write_mask
= ((uintptr_t) kill_hash_entry
->data
) | write_mask
;
468 kill_hash_entry
->data
= (void *) new_write_mask
;
471 /* Not already in the hash table. Make new entry. */
472 _mesa_hash_table_insert(this->kills
, var
, (void *) uintptr_t(write_mask
));
476 * Adds an entry to the available constant list if it's a plain assignment
477 * of a variable to a variable.
480 ir_constant_propagation_visitor::add_constant(ir_assignment
*ir
)
490 ir_dereference_variable
*deref
= ir
->lhs
->as_dereference_variable();
491 ir_constant
*constant
= ir
->rhs
->as_constant();
493 if (!deref
|| !constant
)
496 /* Only do constant propagation on vectors. Constant matrices,
497 * arrays, or structures would require more work elsewhere.
499 if (!deref
->var
->type
->is_vector() && !deref
->var
->type
->is_scalar())
502 /* We can't do copy propagation on buffer variables, since the underlying
503 * memory storage is shared across multiple threads we can't be sure that
504 * the variable value isn't modified between this assignment and the next
505 * instruction where its value is read.
507 if (deref
->var
->data
.mode
== ir_var_shader_storage
||
508 deref
->var
->data
.mode
== ir_var_shader_shared
)
511 entry
= new(this->lin_ctx
) acp_entry(deref
->var
, ir
->write_mask
, constant
);
512 this->acp
->push_tail(entry
);
515 } /* unnamed namespace */
518 * Does a constant propagation pass on the code present in the instruction stream.
521 do_constant_propagation(exec_list
*instructions
)
523 ir_constant_propagation_visitor v
;
525 visit_list_elements(&v
, instructions
);