2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_copy_propagation_elements.cpp
27 * Replaces usage of recently-copied components of variables with the
28 * previous copy of the variable.
30 * This should reduce the number of MOV instructions in the generated
31 * programs and help triggering other optimizations that live in GLSL
36 #include "ir_rvalue_visitor.h"
37 #include "ir_basic_block.h"
38 #include "ir_optimization.h"
39 #include "compiler/glsl_types.h"
40 #include "util/hash_table.h"
43 static bool debug
= false;
50 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry
)
52 /* If set, rhs_full indicates that this ACP entry represents a
53 * whole-variable copy. The rhs_element[] array will still be filled,
54 * to allow the swizzling from its components in case the variable
55 * was a vector (and to simplify some of the erase() and write_vector()
59 ir_variable
*rhs_full
;
60 ir_variable
*rhs_element
[4];
61 unsigned rhs_channel
[4];
63 /* Set of variables that use the variable associated with this acp_entry as
64 * RHS. This has the "reverse references" of rhs_full/rhs_element. It is
65 * used to speed up invalidating those references when the acp_entry
71 class copy_propagation_state
{
73 DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state
);
76 copy_propagation_state
* create(void *mem_ctx
)
78 return new (mem_ctx
) copy_propagation_state(NULL
);
81 copy_propagation_state
* clone()
83 return new (ralloc_parent(this)) copy_propagation_state(this);
88 /* Individual elements were allocated from a linear allocator, so will
89 * be destroyed when the state is destroyed.
91 _mesa_hash_table_clear(acp
, NULL
);
95 void erase(ir_variable
*var
, unsigned write_mask
)
97 acp_entry
*entry
= pull_acp(var
);
98 entry
->rhs_full
= NULL
;
100 for (int i
= 0; i
< 4; i
++) {
101 if (!entry
->rhs_element
[i
])
103 if ((write_mask
& (1 << i
)) == 0)
106 ir_variable
*to_remove
= entry
->rhs_element
[i
];
107 entry
->rhs_element
[i
] = NULL
;
108 remove_unused_var_from_dsts(entry
, var
, to_remove
);
111 /* TODO: Check write mask, and possibly not clear everything. */
113 /* For any usage of our variable on the RHS, clear it out. */
114 set_foreach(entry
->dsts
, set_entry
) {
115 ir_variable
*dst_var
= (ir_variable
*)set_entry
->key
;
116 acp_entry
*dst_entry
= pull_acp(dst_var
);
117 for (int i
= 0; i
< 4; i
++) {
118 if (dst_entry
->rhs_element
[i
] == var
)
119 dst_entry
->rhs_element
[i
] = NULL
;
121 if (dst_entry
->rhs_full
== var
)
122 dst_entry
->rhs_full
= NULL
;
123 _mesa_set_remove(entry
->dsts
, set_entry
);
127 acp_entry
*read(ir_variable
*var
)
129 for (copy_propagation_state
*s
= this; s
!= NULL
; s
= s
->fallback
) {
130 hash_entry
*ht_entry
= _mesa_hash_table_search(s
->acp
, var
);
132 return (acp_entry
*) ht_entry
->data
;
137 void write_elements(ir_variable
*lhs
, ir_variable
*rhs
, unsigned write_mask
, int swizzle
[4])
139 acp_entry
*lhs_entry
= pull_acp(lhs
);
140 lhs_entry
->rhs_full
= NULL
;
142 for (int i
= 0; i
< 4; i
++) {
143 if ((write_mask
& (1 << i
)) == 0)
145 ir_variable
*to_remove
= lhs_entry
->rhs_element
[i
];
146 lhs_entry
->rhs_element
[i
] = rhs
;
147 lhs_entry
->rhs_channel
[i
] = swizzle
[i
];
149 remove_unused_var_from_dsts(lhs_entry
, lhs
, to_remove
);
152 acp_entry
*rhs_entry
= pull_acp(rhs
);
153 _mesa_set_add(rhs_entry
->dsts
, lhs
);
156 void write_full(ir_variable
*lhs
, ir_variable
*rhs
)
158 acp_entry
*lhs_entry
= pull_acp(lhs
);
159 if (lhs_entry
->rhs_full
== rhs
)
162 if (lhs_entry
->rhs_full
) {
163 remove_from_dsts(lhs_entry
->rhs_full
, lhs
);
164 } else if (lhs
->type
->is_vector()) {
165 for (int i
= 0; i
< 4; i
++) {
166 if (lhs_entry
->rhs_element
[i
])
167 remove_from_dsts(lhs_entry
->rhs_element
[i
], lhs
);
171 lhs_entry
->rhs_full
= rhs
;
172 acp_entry
*rhs_entry
= pull_acp(rhs
);
173 _mesa_set_add(rhs_entry
->dsts
, lhs
);
175 if (lhs
->type
->is_vector()) {
176 for (int i
= 0; i
< 4; i
++) {
177 lhs_entry
->rhs_element
[i
] = rhs
;
178 lhs_entry
->rhs_channel
[i
] = i
;
183 void remove_unused_var_from_dsts(acp_entry
*lhs_entry
, ir_variable
*lhs
, ir_variable
*var
)
188 /* If lhs still uses var, don't remove anything. */
189 for (int j
= 0; j
< 4; j
++) {
190 if (lhs_entry
->rhs_element
[j
] == var
)
194 acp_entry
*element
= pull_acp(var
);
196 _mesa_set_remove_key(element
->dsts
, lhs
);
200 explicit copy_propagation_state(copy_propagation_state
*fallback
)
202 this->fallback
= fallback
;
203 /* Use 'this' as context for the table, no explicit destruction
206 acp
= _mesa_pointer_hash_table_create(this);
207 lin_ctx
= linear_alloc_parent(this, 0);
210 acp_entry
*pull_acp(ir_variable
*var
)
212 hash_entry
*ht_entry
= _mesa_hash_table_search(acp
, var
);
214 return (acp_entry
*) ht_entry
->data
;
216 /* If not found, create one and copy data from fallback if available. */
217 acp_entry
*entry
= new(lin_ctx
) acp_entry();
218 _mesa_hash_table_insert(acp
, var
, entry
);
221 for (copy_propagation_state
*s
= fallback
; s
!= NULL
; s
= s
->fallback
) {
222 hash_entry
*fallback_ht_entry
= _mesa_hash_table_search(s
->acp
, var
);
223 if (fallback_ht_entry
) {
224 acp_entry
*fallback_entry
= (acp_entry
*) fallback_ht_entry
->data
;
225 *entry
= *fallback_entry
;
226 entry
->dsts
= _mesa_set_clone(fallback_entry
->dsts
, this);
233 entry
->dsts
= _mesa_pointer_set_create(this);
240 remove_from_dsts(ir_variable
*var
, ir_variable
*to_remove
)
242 acp_entry
*entry
= pull_acp(var
);
244 _mesa_set_remove_key(entry
->dsts
, to_remove
);
247 /** Available Copy to Propagate table, from variable to the entry
248 * containing the current sources that can be used. */
251 /** When a state is cloned, entries are copied on demand from fallback. */
252 copy_propagation_state
*fallback
;
257 class kill_entry
: public exec_node
260 /* override operator new from exec_node */
261 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry
)
263 kill_entry(ir_variable
*var
, int write_mask
)
266 this->write_mask
= write_mask
;
270 unsigned int write_mask
;
273 class ir_copy_propagation_elements_visitor
: public ir_rvalue_visitor
{
275 ir_copy_propagation_elements_visitor()
277 this->progress
= false;
278 this->killed_all
= false;
279 this->mem_ctx
= ralloc_context(NULL
);
280 this->lin_ctx
= linear_alloc_parent(this->mem_ctx
, 0);
281 this->shader_mem_ctx
= NULL
;
282 this->kills
= new(mem_ctx
) exec_list
;
283 this->state
= copy_propagation_state::create(mem_ctx
);
285 ~ir_copy_propagation_elements_visitor()
287 ralloc_free(mem_ctx
);
290 virtual ir_visitor_status
visit(ir_dereference_variable
*);
292 void handle_loop(ir_loop
*, bool keep_acp
);
293 virtual ir_visitor_status
visit_enter(class ir_loop
*);
294 virtual ir_visitor_status
visit_enter(class ir_function_signature
*);
295 virtual ir_visitor_status
visit_leave(class ir_assignment
*);
296 virtual ir_visitor_status
visit_enter(class ir_call
*);
297 virtual ir_visitor_status
visit_enter(class ir_if
*);
298 virtual ir_visitor_status
visit_leave(class ir_swizzle
*);
300 void handle_rvalue(ir_rvalue
**rvalue
);
302 void add_copy(ir_assignment
*ir
);
303 void kill(kill_entry
*k
);
304 void handle_if_block(exec_list
*instructions
, exec_list
*kills
, bool *killed_all
);
306 copy_propagation_state
*state
;
309 * List of kill_entry: The variables whose values were killed in this
318 /* Context for our local data structures. */
321 /* Context for allocating new shader nodes. */
322 void *shader_mem_ctx
;
325 } /* unnamed namespace */
328 ir_copy_propagation_elements_visitor::visit(ir_dereference_variable
*ir
)
330 if (this->in_assignee
)
331 return visit_continue
;
333 const acp_entry
*entry
= state
->read(ir
->var
);
334 if (entry
&& entry
->rhs_full
) {
335 ir
->var
= (ir_variable
*) entry
->rhs_full
;
339 return visit_continue
;
343 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature
*ir
)
345 /* Treat entry into a function signature as a completely separate
346 * block. Any instructions at global scope will be shuffled into
347 * main() at link time, so they're irrelevant to us.
349 exec_list
*orig_kills
= this->kills
;
350 bool orig_killed_all
= this->killed_all
;
352 this->kills
= new(mem_ctx
) exec_list
;
353 this->killed_all
= false;
355 copy_propagation_state
*orig_state
= state
;
356 this->state
= copy_propagation_state::create(mem_ctx
);
358 visit_list_elements(this, &ir
->body
);
361 this->state
= orig_state
;
363 ralloc_free(this->kills
);
364 this->kills
= orig_kills
;
365 this->killed_all
= orig_killed_all
;
367 return visit_continue_with_parent
;
371 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment
*ir
)
373 ir_dereference_variable
*lhs
= ir
->lhs
->as_dereference_variable();
374 ir_variable
*var
= ir
->lhs
->variable_referenced();
378 if (lhs
&& var
->type
->is_vector())
379 k
= new(this->lin_ctx
) kill_entry(var
, ir
->write_mask
);
381 k
= new(this->lin_ctx
) kill_entry(var
, ~0);
387 return visit_continue
;
391 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle
*)
393 /* Don't visit the values of swizzles since they are handled while
394 * visiting the swizzle itself.
396 return visit_continue
;
400 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
402 * This is where the actual copy propagation occurs. Note that the
403 * rewriting of ir_dereference means that the ir_dereference instance
404 * must not be shared by multiple IR operations!
407 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue
**ir
)
410 ir_dereference_variable
*deref_var
;
411 ir_variable
*source
[4] = {NULL
, NULL
, NULL
, NULL
};
412 int source_chan
[4] = {0, 0, 0, 0};
414 bool noop_swizzle
= true;
419 ir_swizzle
*swizzle
= (*ir
)->as_swizzle();
421 deref_var
= swizzle
->val
->as_dereference_variable();
425 swizzle_chan
[0] = swizzle
->mask
.x
;
426 swizzle_chan
[1] = swizzle
->mask
.y
;
427 swizzle_chan
[2] = swizzle
->mask
.z
;
428 swizzle_chan
[3] = swizzle
->mask
.w
;
429 chans
= swizzle
->type
->vector_elements
;
431 deref_var
= (*ir
)->as_dereference_variable();
439 chans
= deref_var
->type
->vector_elements
;
442 if (this->in_assignee
)
445 ir_variable
*var
= deref_var
->var
;
447 /* Try to find ACP entries covering swizzle_chan[], hoping they're
448 * the same source variable.
451 const acp_entry
*entry
= state
->read(var
);
453 for (int c
= 0; c
< chans
; c
++) {
454 unsigned index
= swizzle_chan
[c
];
455 ir_variable
*src
= entry
->rhs_element
[index
];
459 source_chan
[c
] = entry
->rhs_channel
[index
];
460 if (source_chan
[c
] != swizzle_chan
[c
])
461 noop_swizzle
= false;
465 /* Make sure all channels are copying from the same source variable. */
468 for (int c
= 1; c
< chans
; c
++) {
469 if (source
[c
] != source
[0])
474 shader_mem_ctx
= ralloc_parent(deref_var
);
476 /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
477 * of itself, which would just be deleted by opt_noop_swizzle).
479 if (source
[0] == var
&& noop_swizzle
)
483 printf("Copy propagation from:\n");
487 deref_var
= new(shader_mem_ctx
) ir_dereference_variable(source
[0]);
488 *ir
= new(shader_mem_ctx
) ir_swizzle(deref_var
,
505 ir_copy_propagation_elements_visitor::visit_enter(ir_call
*ir
)
507 /* Do copy propagation on call parameters, but skip any out params */
508 foreach_two_lists(formal_node
, &ir
->callee
->parameters
,
509 actual_node
, &ir
->actual_parameters
) {
510 ir_variable
*sig_param
= (ir_variable
*) formal_node
;
511 ir_rvalue
*ir
= (ir_rvalue
*) actual_node
;
512 if (sig_param
->data
.mode
!= ir_var_function_out
513 && sig_param
->data
.mode
!= ir_var_function_inout
) {
518 if (!ir
->callee
->is_intrinsic()) {
520 this->killed_all
= true;
522 if (ir
->return_deref
) {
523 kill(new(this->lin_ctx
) kill_entry(ir
->return_deref
->var
, ~0));
526 foreach_two_lists(formal_node
, &ir
->callee
->parameters
,
527 actual_node
, &ir
->actual_parameters
) {
528 ir_variable
*sig_param
= (ir_variable
*) formal_node
;
529 if (sig_param
->data
.mode
== ir_var_function_out
||
530 sig_param
->data
.mode
== ir_var_function_inout
) {
531 ir_rvalue
*ir
= (ir_rvalue
*) actual_node
;
532 ir_variable
*var
= ir
->variable_referenced();
533 kill(new(this->lin_ctx
) kill_entry(var
, ~0));
538 return visit_continue_with_parent
;
542 ir_copy_propagation_elements_visitor::handle_if_block(exec_list
*instructions
, exec_list
*kills
, bool *killed_all
)
544 exec_list
*orig_kills
= this->kills
;
545 bool orig_killed_all
= this->killed_all
;
548 this->killed_all
= false;
550 /* Populate the initial acp with a copy of the original */
551 copy_propagation_state
*orig_state
= state
;
552 this->state
= orig_state
->clone();
554 visit_list_elements(this, instructions
);
557 this->state
= orig_state
;
559 *killed_all
= this->killed_all
;
560 this->kills
= orig_kills
;
561 this->killed_all
= orig_killed_all
;
565 ir_copy_propagation_elements_visitor::visit_enter(ir_if
*ir
)
567 ir
->condition
->accept(this);
569 exec_list
*new_kills
= new(mem_ctx
) exec_list
;
570 bool then_killed_all
= false;
571 bool else_killed_all
= false;
573 handle_if_block(&ir
->then_instructions
, new_kills
, &then_killed_all
);
574 handle_if_block(&ir
->else_instructions
, new_kills
, &else_killed_all
);
576 if (then_killed_all
|| else_killed_all
) {
580 foreach_in_list_safe(kill_entry
, k
, new_kills
)
584 ralloc_free(new_kills
);
586 /* handle_if_block() already descended into the children. */
587 return visit_continue_with_parent
;
591 ir_copy_propagation_elements_visitor::handle_loop(ir_loop
*ir
, bool keep_acp
)
593 exec_list
*orig_kills
= this->kills
;
594 bool orig_killed_all
= this->killed_all
;
596 this->kills
= new(mem_ctx
) exec_list
;
597 this->killed_all
= false;
599 copy_propagation_state
*orig_state
= state
;
602 /* Populate the initial acp with a copy of the original */
603 this->state
= orig_state
->clone();
605 this->state
= copy_propagation_state::create(mem_ctx
);
608 visit_list_elements(this, &ir
->body_instructions
);
611 this->state
= orig_state
;
613 if (this->killed_all
)
614 this->state
->erase_all();
616 exec_list
*new_kills
= this->kills
;
617 this->kills
= orig_kills
;
618 this->killed_all
= this->killed_all
|| orig_killed_all
;
620 foreach_in_list_safe(kill_entry
, k
, new_kills
) {
624 ralloc_free(new_kills
);
628 ir_copy_propagation_elements_visitor::visit_enter(ir_loop
*ir
)
630 handle_loop(ir
, false);
631 handle_loop(ir
, true);
633 /* already descended into the children. */
634 return visit_continue_with_parent
;
637 /* Remove any entries currently in the ACP for this kill. */
639 ir_copy_propagation_elements_visitor::kill(kill_entry
*k
)
641 state
->erase(k
->var
, k
->write_mask
);
643 /* If we were on a list, remove ourselves before inserting */
647 this->kills
->push_tail(k
);
651 * Adds directly-copied channels between vector variables to the available
652 * copy propagation list.
655 ir_copy_propagation_elements_visitor::add_copy(ir_assignment
*ir
)
661 ir_variable
*lhs_var
= ir
->whole_variable_written();
662 ir_dereference_variable
*rhs
= ir
->rhs
->as_dereference_variable();
664 if (lhs_var
!= NULL
&& rhs
&& rhs
->var
!= NULL
&& lhs_var
!= rhs
->var
) {
665 if (lhs_var
->data
.mode
== ir_var_shader_storage
||
666 lhs_var
->data
.mode
== ir_var_shader_shared
||
667 rhs
->var
->data
.mode
== ir_var_shader_storage
||
668 rhs
->var
->data
.mode
== ir_var_shader_shared
||
669 lhs_var
->data
.precise
!= rhs
->var
->data
.precise
) {
672 state
->write_full(lhs_var
, rhs
->var
);
677 int orig_swizzle
[4] = {0, 1, 2, 3};
680 ir_dereference_variable
*lhs
= ir
->lhs
->as_dereference_variable();
681 if (!lhs
|| !(lhs
->type
->is_scalar() || lhs
->type
->is_vector()))
684 if (lhs
->var
->data
.mode
== ir_var_shader_storage
||
685 lhs
->var
->data
.mode
== ir_var_shader_shared
)
688 ir_dereference_variable
*rhs
= ir
->rhs
->as_dereference_variable();
690 ir_swizzle
*swiz
= ir
->rhs
->as_swizzle();
694 rhs
= swiz
->val
->as_dereference_variable();
698 orig_swizzle
[0] = swiz
->mask
.x
;
699 orig_swizzle
[1] = swiz
->mask
.y
;
700 orig_swizzle
[2] = swiz
->mask
.z
;
701 orig_swizzle
[3] = swiz
->mask
.w
;
704 if (rhs
->var
->data
.mode
== ir_var_shader_storage
||
705 rhs
->var
->data
.mode
== ir_var_shader_shared
)
708 /* Move the swizzle channels out to the positions they match in the
709 * destination. We don't want to have to rewrite the swizzle[]
710 * array every time we clear a bit of the write_mask.
713 for (int i
= 0; i
< 4; i
++) {
714 if (ir
->write_mask
& (1 << i
))
715 swizzle
[i
] = orig_swizzle
[j
++];
718 int write_mask
= ir
->write_mask
;
719 if (lhs
->var
== rhs
->var
) {
720 /* If this is a copy from the variable to itself, then we need
721 * to be sure not to include the updated channels from this
722 * instruction in the set of new source channels to be
723 * copy-propagated from.
725 for (int i
= 0; i
< 4; i
++) {
726 if (ir
->write_mask
& (1 << orig_swizzle
[i
]))
727 write_mask
&= ~(1 << i
);
731 if (lhs
->var
->data
.precise
!= rhs
->var
->data
.precise
)
734 state
->write_elements(lhs
->var
, rhs
->var
, write_mask
, swizzle
);
738 do_copy_propagation_elements(exec_list
*instructions
)
740 ir_copy_propagation_elements_visitor v
;
742 visit_list_elements(&v
, instructions
);