2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_copy_propagation_elements.cpp
27 * Replaces usage of recently-copied components of variables with the
28 * previous copy of the variable.
30 * This should reduce the number of MOV instructions in the generated
31 * programs and help triggering other optimizations that live in GLSL
36 #include "ir_rvalue_visitor.h"
37 #include "ir_basic_block.h"
38 #include "ir_optimization.h"
39 #include "compiler/glsl_types.h"
40 #include "util/hash_table.h"
43 static bool debug
= false;
50 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry
)
52 /* If set, rhs_full indicates that this ACP entry represents a
53 * whole-variable copy. The rhs_element[] array will still be filled,
54 * to allow the swizzling from its components in case the variable
55 * was a vector (and to simplify some of the erase() and write_vector()
59 ir_variable
*rhs_full
;
60 ir_variable
*rhs_element
[4];
61 unsigned rhs_channel
[4];
63 /* Set of variables that use the variable associated with this acp_entry as
64 * RHS. This has the "reverse references" of rhs_full/rhs_element. It is
65 * used to speed up invalidating those references when the acp_entry
71 class copy_propagation_state
{
73 DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state
);
76 copy_propagation_state
* create(void *mem_ctx
)
78 return new (mem_ctx
) copy_propagation_state(NULL
);
81 copy_propagation_state
* clone()
83 return new (ralloc_parent(this)) copy_propagation_state(this);
88 /* Individual elements were allocated from a linear allocator, so will
89 * be destroyed when the state is destroyed.
91 _mesa_hash_table_clear(acp
, NULL
);
95 void erase(ir_variable
*var
, unsigned write_mask
)
97 acp_entry
*entry
= pull_acp(var
);
98 entry
->rhs_full
= NULL
;
100 for (int i
= 0; i
< 4; i
++) {
101 if (!entry
->rhs_element
[i
])
103 if ((write_mask
& (1 << i
)) == 0)
106 ir_variable
*to_remove
= entry
->rhs_element
[i
];
107 entry
->rhs_element
[i
] = NULL
;
108 remove_unused_var_from_dsts(entry
, var
, to_remove
);
111 /* TODO: Check write mask, and possibly not clear everything. */
113 /* For any usage of our variable on the RHS, clear it out. */
114 struct set_entry
*set_entry
;
115 set_foreach(entry
->dsts
, set_entry
) {
116 ir_variable
*dst_var
= (ir_variable
*)set_entry
->key
;
117 acp_entry
*dst_entry
= pull_acp(dst_var
);
118 for (int i
= 0; i
< 4; i
++) {
119 if (dst_entry
->rhs_element
[i
] == var
)
120 dst_entry
->rhs_element
[i
] = NULL
;
122 if (dst_entry
->rhs_full
== var
)
123 dst_entry
->rhs_full
= NULL
;
124 _mesa_set_remove(entry
->dsts
, set_entry
);
128 acp_entry
*read(ir_variable
*var
)
130 for (copy_propagation_state
*s
= this; s
!= NULL
; s
= s
->fallback
) {
131 hash_entry
*ht_entry
= _mesa_hash_table_search(s
->acp
, var
);
133 return (acp_entry
*) ht_entry
->data
;
138 void write_elements(ir_variable
*lhs
, ir_variable
*rhs
, unsigned write_mask
, int swizzle
[4])
140 acp_entry
*lhs_entry
= pull_acp(lhs
);
141 lhs_entry
->rhs_full
= NULL
;
143 for (int i
= 0; i
< 4; i
++) {
144 if ((write_mask
& (1 << i
)) == 0)
146 ir_variable
*to_remove
= lhs_entry
->rhs_element
[i
];
147 lhs_entry
->rhs_element
[i
] = rhs
;
148 lhs_entry
->rhs_channel
[i
] = swizzle
[i
];
150 remove_unused_var_from_dsts(lhs_entry
, lhs
, to_remove
);
153 acp_entry
*rhs_entry
= pull_acp(rhs
);
154 _mesa_set_add(rhs_entry
->dsts
, lhs
);
157 void write_full(ir_variable
*lhs
, ir_variable
*rhs
)
159 acp_entry
*lhs_entry
= pull_acp(lhs
);
160 if (lhs_entry
->rhs_full
== rhs
)
163 if (lhs_entry
->rhs_full
) {
164 remove_from_dsts(lhs_entry
->rhs_full
, lhs
);
165 } else if (lhs
->type
->is_vector()) {
166 for (int i
= 0; i
< 4; i
++) {
167 if (lhs_entry
->rhs_element
[i
])
168 remove_from_dsts(lhs_entry
->rhs_element
[i
], lhs
);
172 lhs_entry
->rhs_full
= rhs
;
173 acp_entry
*rhs_entry
= pull_acp(rhs
);
174 _mesa_set_add(rhs_entry
->dsts
, lhs
);
176 if (lhs
->type
->is_vector()) {
177 for (int i
= 0; i
< 4; i
++) {
178 lhs_entry
->rhs_element
[i
] = rhs
;
179 lhs_entry
->rhs_channel
[i
] = i
;
184 void remove_unused_var_from_dsts(acp_entry
*lhs_entry
, ir_variable
*lhs
, ir_variable
*var
)
189 /* If lhs still uses var, don't remove anything. */
190 for (int j
= 0; j
< 4; j
++) {
191 if (lhs_entry
->rhs_element
[j
] == var
)
195 acp_entry
*element
= pull_acp(var
);
197 _mesa_set_remove_key(element
->dsts
, lhs
);
201 explicit copy_propagation_state(copy_propagation_state
*fallback
)
203 this->fallback
= fallback
;
204 /* Use 'this' as context for the table, no explicit destruction
207 acp
= _mesa_hash_table_create(this, _mesa_hash_pointer
,
208 _mesa_key_pointer_equal
);
209 lin_ctx
= linear_alloc_parent(this, 0);
212 acp_entry
*pull_acp(ir_variable
*var
)
214 hash_entry
*ht_entry
= _mesa_hash_table_search(acp
, var
);
216 return (acp_entry
*) ht_entry
->data
;
218 /* If not found, create one and copy data from fallback if available. */
219 acp_entry
*entry
= new(lin_ctx
) acp_entry();
220 _mesa_hash_table_insert(acp
, var
, entry
);
223 for (copy_propagation_state
*s
= fallback
; s
!= NULL
; s
= s
->fallback
) {
224 hash_entry
*fallback_ht_entry
= _mesa_hash_table_search(s
->acp
, var
);
225 if (fallback_ht_entry
) {
226 acp_entry
*fallback_entry
= (acp_entry
*) fallback_ht_entry
->data
;
227 *entry
= *fallback_entry
;
228 entry
->dsts
= _mesa_set_clone(fallback_entry
->dsts
, this);
235 entry
->dsts
= _mesa_set_create(this, _mesa_hash_pointer
,
236 _mesa_key_pointer_equal
);
243 remove_from_dsts(ir_variable
*var
, ir_variable
*to_remove
)
245 acp_entry
*entry
= pull_acp(var
);
247 _mesa_set_remove_key(entry
->dsts
, to_remove
);
250 /** Available Copy to Propagate table, from variable to the entry
251 * containing the current sources that can be used. */
254 /** When a state is cloned, entries are copied on demand from fallback. */
255 copy_propagation_state
*fallback
;
260 class kill_entry
: public exec_node
263 /* override operator new from exec_node */
264 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry
)
266 kill_entry(ir_variable
*var
, int write_mask
)
269 this->write_mask
= write_mask
;
273 unsigned int write_mask
;
276 class ir_copy_propagation_elements_visitor
: public ir_rvalue_visitor
{
278 ir_copy_propagation_elements_visitor()
280 this->progress
= false;
281 this->killed_all
= false;
282 this->mem_ctx
= ralloc_context(NULL
);
283 this->lin_ctx
= linear_alloc_parent(this->mem_ctx
, 0);
284 this->shader_mem_ctx
= NULL
;
285 this->kills
= new(mem_ctx
) exec_list
;
286 this->state
= copy_propagation_state::create(mem_ctx
);
288 ~ir_copy_propagation_elements_visitor()
290 ralloc_free(mem_ctx
);
293 virtual ir_visitor_status
visit(ir_dereference_variable
*);
295 void handle_loop(ir_loop
*, bool keep_acp
);
296 virtual ir_visitor_status
visit_enter(class ir_loop
*);
297 virtual ir_visitor_status
visit_enter(class ir_function_signature
*);
298 virtual ir_visitor_status
visit_leave(class ir_assignment
*);
299 virtual ir_visitor_status
visit_enter(class ir_call
*);
300 virtual ir_visitor_status
visit_enter(class ir_if
*);
301 virtual ir_visitor_status
visit_leave(class ir_swizzle
*);
303 void handle_rvalue(ir_rvalue
**rvalue
);
305 void add_copy(ir_assignment
*ir
);
306 void kill(kill_entry
*k
);
307 void handle_if_block(exec_list
*instructions
, exec_list
*kills
, bool *killed_all
);
309 copy_propagation_state
*state
;
312 * List of kill_entry: The variables whose values were killed in this
321 /* Context for our local data structures. */
324 /* Context for allocating new shader nodes. */
325 void *shader_mem_ctx
;
328 } /* unnamed namespace */
331 ir_copy_propagation_elements_visitor::visit(ir_dereference_variable
*ir
)
333 if (this->in_assignee
)
334 return visit_continue
;
336 const acp_entry
*entry
= state
->read(ir
->var
);
337 if (entry
&& entry
->rhs_full
) {
338 ir
->var
= (ir_variable
*) entry
->rhs_full
;
342 return visit_continue
;
346 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature
*ir
)
348 /* Treat entry into a function signature as a completely separate
349 * block. Any instructions at global scope will be shuffled into
350 * main() at link time, so they're irrelevant to us.
352 exec_list
*orig_kills
= this->kills
;
353 bool orig_killed_all
= this->killed_all
;
355 this->kills
= new(mem_ctx
) exec_list
;
356 this->killed_all
= false;
358 copy_propagation_state
*orig_state
= state
;
359 this->state
= copy_propagation_state::create(mem_ctx
);
361 visit_list_elements(this, &ir
->body
);
364 this->state
= orig_state
;
366 ralloc_free(this->kills
);
367 this->kills
= orig_kills
;
368 this->killed_all
= orig_killed_all
;
370 return visit_continue_with_parent
;
374 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment
*ir
)
376 ir_dereference_variable
*lhs
= ir
->lhs
->as_dereference_variable();
377 ir_variable
*var
= ir
->lhs
->variable_referenced();
381 if (lhs
&& var
->type
->is_vector())
382 k
= new(this->lin_ctx
) kill_entry(var
, ir
->write_mask
);
384 k
= new(this->lin_ctx
) kill_entry(var
, ~0);
390 return visit_continue
;
394 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle
*)
396 /* Don't visit the values of swizzles since they are handled while
397 * visiting the swizzle itself.
399 return visit_continue
;
403 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
405 * This is where the actual copy propagation occurs. Note that the
406 * rewriting of ir_dereference means that the ir_dereference instance
407 * must not be shared by multiple IR operations!
410 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue
**ir
)
413 ir_dereference_variable
*deref_var
;
414 ir_variable
*source
[4] = {NULL
, NULL
, NULL
, NULL
};
415 int source_chan
[4] = {0, 0, 0, 0};
417 bool noop_swizzle
= true;
422 ir_swizzle
*swizzle
= (*ir
)->as_swizzle();
424 deref_var
= swizzle
->val
->as_dereference_variable();
428 swizzle_chan
[0] = swizzle
->mask
.x
;
429 swizzle_chan
[1] = swizzle
->mask
.y
;
430 swizzle_chan
[2] = swizzle
->mask
.z
;
431 swizzle_chan
[3] = swizzle
->mask
.w
;
432 chans
= swizzle
->type
->vector_elements
;
434 deref_var
= (*ir
)->as_dereference_variable();
442 chans
= deref_var
->type
->vector_elements
;
445 if (this->in_assignee
)
448 ir_variable
*var
= deref_var
->var
;
450 /* Try to find ACP entries covering swizzle_chan[], hoping they're
451 * the same source variable.
454 const acp_entry
*entry
= state
->read(var
);
456 for (int c
= 0; c
< chans
; c
++) {
457 unsigned index
= swizzle_chan
[c
];
458 ir_variable
*src
= entry
->rhs_element
[index
];
462 source_chan
[c
] = entry
->rhs_channel
[index
];
463 if (source_chan
[c
] != swizzle_chan
[c
])
464 noop_swizzle
= false;
468 /* Make sure all channels are copying from the same source variable. */
471 for (int c
= 1; c
< chans
; c
++) {
472 if (source
[c
] != source
[0])
477 shader_mem_ctx
= ralloc_parent(deref_var
);
479 /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
480 * of itself, which would just be deleted by opt_noop_swizzle).
482 if (source
[0] == var
&& noop_swizzle
)
486 printf("Copy propagation from:\n");
490 deref_var
= new(shader_mem_ctx
) ir_dereference_variable(source
[0]);
491 *ir
= new(shader_mem_ctx
) ir_swizzle(deref_var
,
508 ir_copy_propagation_elements_visitor::visit_enter(ir_call
*ir
)
510 /* Do copy propagation on call parameters, but skip any out params */
511 foreach_two_lists(formal_node
, &ir
->callee
->parameters
,
512 actual_node
, &ir
->actual_parameters
) {
513 ir_variable
*sig_param
= (ir_variable
*) formal_node
;
514 ir_rvalue
*ir
= (ir_rvalue
*) actual_node
;
515 if (sig_param
->data
.mode
!= ir_var_function_out
516 && sig_param
->data
.mode
!= ir_var_function_inout
) {
521 if (!ir
->callee
->is_intrinsic()) {
523 this->killed_all
= true;
525 if (ir
->return_deref
) {
526 kill(new(this->lin_ctx
) kill_entry(ir
->return_deref
->var
, ~0));
529 foreach_two_lists(formal_node
, &ir
->callee
->parameters
,
530 actual_node
, &ir
->actual_parameters
) {
531 ir_variable
*sig_param
= (ir_variable
*) formal_node
;
532 if (sig_param
->data
.mode
== ir_var_function_out
||
533 sig_param
->data
.mode
== ir_var_function_inout
) {
534 ir_rvalue
*ir
= (ir_rvalue
*) actual_node
;
535 ir_variable
*var
= ir
->variable_referenced();
536 kill(new(this->lin_ctx
) kill_entry(var
, ~0));
541 return visit_continue_with_parent
;
545 ir_copy_propagation_elements_visitor::handle_if_block(exec_list
*instructions
, exec_list
*kills
, bool *killed_all
)
547 exec_list
*orig_kills
= this->kills
;
548 bool orig_killed_all
= this->killed_all
;
551 this->killed_all
= false;
553 /* Populate the initial acp with a copy of the original */
554 copy_propagation_state
*orig_state
= state
;
555 this->state
= orig_state
->clone();
557 visit_list_elements(this, instructions
);
560 this->state
= orig_state
;
562 *killed_all
= this->killed_all
;
563 this->kills
= orig_kills
;
564 this->killed_all
= orig_killed_all
;
568 ir_copy_propagation_elements_visitor::visit_enter(ir_if
*ir
)
570 ir
->condition
->accept(this);
572 exec_list
*new_kills
= new(mem_ctx
) exec_list
;
573 bool then_killed_all
= false;
574 bool else_killed_all
= false;
576 handle_if_block(&ir
->then_instructions
, new_kills
, &then_killed_all
);
577 handle_if_block(&ir
->else_instructions
, new_kills
, &else_killed_all
);
579 if (then_killed_all
|| else_killed_all
) {
583 foreach_in_list_safe(kill_entry
, k
, new_kills
)
587 ralloc_free(new_kills
);
589 /* handle_if_block() already descended into the children. */
590 return visit_continue_with_parent
;
594 ir_copy_propagation_elements_visitor::handle_loop(ir_loop
*ir
, bool keep_acp
)
596 exec_list
*orig_kills
= this->kills
;
597 bool orig_killed_all
= this->killed_all
;
599 this->kills
= new(mem_ctx
) exec_list
;
600 this->killed_all
= false;
602 copy_propagation_state
*orig_state
= state
;
605 /* Populate the initial acp with a copy of the original */
606 this->state
= orig_state
->clone();
608 this->state
= copy_propagation_state::create(mem_ctx
);
611 visit_list_elements(this, &ir
->body_instructions
);
614 this->state
= orig_state
;
616 if (this->killed_all
)
617 this->state
->erase_all();
619 exec_list
*new_kills
= this->kills
;
620 this->kills
= orig_kills
;
621 this->killed_all
= this->killed_all
|| orig_killed_all
;
623 foreach_in_list_safe(kill_entry
, k
, new_kills
) {
627 ralloc_free(new_kills
);
631 ir_copy_propagation_elements_visitor::visit_enter(ir_loop
*ir
)
633 handle_loop(ir
, false);
634 handle_loop(ir
, true);
636 /* already descended into the children. */
637 return visit_continue_with_parent
;
640 /* Remove any entries currently in the ACP for this kill. */
642 ir_copy_propagation_elements_visitor::kill(kill_entry
*k
)
644 state
->erase(k
->var
, k
->write_mask
);
646 /* If we were on a list, remove ourselves before inserting */
650 this->kills
->push_tail(k
);
654 * Adds directly-copied channels between vector variables to the available
655 * copy propagation list.
658 ir_copy_propagation_elements_visitor::add_copy(ir_assignment
*ir
)
664 ir_variable
*lhs_var
= ir
->whole_variable_written();
665 ir_dereference_variable
*rhs
= ir
->rhs
->as_dereference_variable();
667 if (lhs_var
!= NULL
&& rhs
&& rhs
->var
!= NULL
&& lhs_var
!= rhs
->var
) {
668 if (lhs_var
->data
.mode
== ir_var_shader_storage
||
669 lhs_var
->data
.mode
== ir_var_shader_shared
||
670 rhs
->var
->data
.mode
== ir_var_shader_storage
||
671 rhs
->var
->data
.mode
== ir_var_shader_shared
||
672 lhs_var
->data
.precise
!= rhs
->var
->data
.precise
) {
675 state
->write_full(lhs_var
, rhs
->var
);
680 int orig_swizzle
[4] = {0, 1, 2, 3};
683 ir_dereference_variable
*lhs
= ir
->lhs
->as_dereference_variable();
684 if (!lhs
|| !(lhs
->type
->is_scalar() || lhs
->type
->is_vector()))
687 if (lhs
->var
->data
.mode
== ir_var_shader_storage
||
688 lhs
->var
->data
.mode
== ir_var_shader_shared
)
691 ir_dereference_variable
*rhs
= ir
->rhs
->as_dereference_variable();
693 ir_swizzle
*swiz
= ir
->rhs
->as_swizzle();
697 rhs
= swiz
->val
->as_dereference_variable();
701 orig_swizzle
[0] = swiz
->mask
.x
;
702 orig_swizzle
[1] = swiz
->mask
.y
;
703 orig_swizzle
[2] = swiz
->mask
.z
;
704 orig_swizzle
[3] = swiz
->mask
.w
;
707 if (rhs
->var
->data
.mode
== ir_var_shader_storage
||
708 rhs
->var
->data
.mode
== ir_var_shader_shared
)
711 /* Move the swizzle channels out to the positions they match in the
712 * destination. We don't want to have to rewrite the swizzle[]
713 * array every time we clear a bit of the write_mask.
716 for (int i
= 0; i
< 4; i
++) {
717 if (ir
->write_mask
& (1 << i
))
718 swizzle
[i
] = orig_swizzle
[j
++];
721 int write_mask
= ir
->write_mask
;
722 if (lhs
->var
== rhs
->var
) {
723 /* If this is a copy from the variable to itself, then we need
724 * to be sure not to include the updated channels from this
725 * instruction in the set of new source channels to be
726 * copy-propagated from.
728 for (int i
= 0; i
< 4; i
++) {
729 if (ir
->write_mask
& (1 << orig_swizzle
[i
]))
730 write_mask
&= ~(1 << i
);
734 if (lhs
->var
->data
.precise
!= rhs
->var
->data
.precise
)
737 state
->write_elements(lhs
->var
, rhs
->var
, write_mask
, swizzle
);
741 do_copy_propagation_elements(exec_list
*instructions
)
743 ir_copy_propagation_elements_visitor v
;
745 visit_list_elements(&v
, instructions
);