2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_copy_propagation_elements.cpp
27 * Replaces usage of recently-copied components of variables with the
28 * previous copy of the variable.
30 * This pass can be compared with opt_copy_propagation, which operands
31 * on arbitrary whole-variable copies. However, in order to handle
32 * the copy propagation of swizzled variables or writemasked writes,
33 * we want to track things on a channel-wise basis. I found that
34 * trying to mix the swizzled/writemasked support here with the
35 * whole-variable stuff in opt_copy_propagation.cpp just made a mess,
36 * so this is separate despite the ACP handling being somewhat
39 * This should reduce the number of MOV instructions in the generated
40 * programs unless copy propagation is also done on the LIR, and may
41 * help anyway by triggering other optimizations that live in the HIR.
45 #include "ir_rvalue_visitor.h"
46 #include "ir_basic_block.h"
47 #include "ir_optimization.h"
48 #include "compiler/glsl_types.h"
49 #include "util/hash_table.h"
52 static bool debug
= false;
59 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry
)
61 ir_variable
*rhs_element
[4];
62 unsigned rhs_channel
[4];
67 class copy_propagation_state
{
69 DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state
);
72 copy_propagation_state
* create(void *mem_ctx
)
74 return new (mem_ctx
) copy_propagation_state(NULL
);
77 copy_propagation_state
* clone()
79 return new (ralloc_parent(this)) copy_propagation_state(this);
84 /* Individual elements were allocated from a linear allocator, so will
85 * be destroyed when the state is destroyed.
87 _mesa_hash_table_clear(acp
, NULL
);
91 void erase(ir_variable
*var
, unsigned write_mask
)
93 acp_entry
*entry
= pull_acp(var
);
95 for (int i
= 0; i
< 4; i
++) {
96 if (!entry
->rhs_element
[i
])
98 if ((write_mask
& (1 << i
)) == 0)
101 ir_variable
*to_remove
= entry
->rhs_element
[i
];
102 entry
->rhs_element
[i
] = NULL
;
103 remove_unused_var_from_dsts(entry
, var
, to_remove
);
106 /* TODO: Check write mask, and possibly not clear everything. */
108 /* For any usage of our variable on the RHS, clear it out. */
109 struct set_entry
*set_entry
;
110 set_foreach(entry
->dsts
, set_entry
) {
111 ir_variable
*dst_var
= (ir_variable
*)set_entry
->key
;
112 acp_entry
*dst_entry
= pull_acp(dst_var
);
113 for (int i
= 0; i
< 4; i
++) {
114 if (dst_entry
->rhs_element
[i
] == var
)
115 dst_entry
->rhs_element
[i
] = NULL
;
117 _mesa_set_remove(entry
->dsts
, set_entry
);
121 acp_entry
*read(ir_variable
*var
)
123 for (copy_propagation_state
*s
= this; s
!= NULL
; s
= s
->fallback
) {
124 hash_entry
*ht_entry
= _mesa_hash_table_search(s
->acp
, var
);
126 return (acp_entry
*) ht_entry
->data
;
131 void write(ir_variable
*lhs
, ir_variable
*rhs
, unsigned write_mask
, int swizzle
[4])
133 acp_entry
*lhs_entry
= pull_acp(lhs
);
135 for (int i
= 0; i
< 4; i
++) {
136 if ((write_mask
& (1 << i
)) == 0)
138 ir_variable
*to_remove
= lhs_entry
->rhs_element
[i
];
139 lhs_entry
->rhs_element
[i
] = rhs
;
140 lhs_entry
->rhs_channel
[i
] = swizzle
[i
];
142 remove_unused_var_from_dsts(lhs_entry
, lhs
, to_remove
);
145 acp_entry
*rhs_entry
= pull_acp(rhs
);
146 _mesa_set_add(rhs_entry
->dsts
, lhs
);
149 void remove_unused_var_from_dsts(acp_entry
*lhs_entry
, ir_variable
*lhs
, ir_variable
*var
)
154 /* If lhs still uses var, don't remove anything. */
155 for (int j
= 0; j
< 4; j
++) {
156 if (lhs_entry
->rhs_element
[j
] == var
)
160 acp_entry
*element
= pull_acp(var
);
162 _mesa_set_remove_key(element
->dsts
, lhs
);
166 explicit copy_propagation_state(copy_propagation_state
*fallback
)
168 this->fallback
= fallback
;
169 /* Use 'this' as context for the table, no explicit destruction
172 acp
= _mesa_hash_table_create(this, _mesa_hash_pointer
,
173 _mesa_key_pointer_equal
);
174 lin_ctx
= linear_alloc_parent(this, 0);
177 acp_entry
*pull_acp(ir_variable
*var
)
179 hash_entry
*ht_entry
= _mesa_hash_table_search(acp
, var
);
181 return (acp_entry
*) ht_entry
->data
;
183 /* If not found, create one and copy data from fallback if available. */
184 acp_entry
*entry
= new(lin_ctx
) acp_entry();
185 _mesa_hash_table_insert(acp
, var
, entry
);
188 for (copy_propagation_state
*s
= fallback
; s
!= NULL
; s
= s
->fallback
) {
189 hash_entry
*fallback_ht_entry
= _mesa_hash_table_search(s
->acp
, var
);
190 if (fallback_ht_entry
) {
191 acp_entry
*fallback_entry
= (acp_entry
*) fallback_ht_entry
->data
;
192 *entry
= *fallback_entry
;
193 entry
->dsts
= _mesa_set_clone(fallback_entry
->dsts
, this);
200 entry
->dsts
= _mesa_set_create(this, _mesa_hash_pointer
,
201 _mesa_key_pointer_equal
);
207 /** Available Copy to Propagate table, from variable to the entry
208 * containing the current sources that can be used. */
211 /** When a state is cloned, entries are copied on demand from fallback. */
212 copy_propagation_state
*fallback
;
217 class kill_entry
: public exec_node
220 /* override operator new from exec_node */
221 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry
)
223 kill_entry(ir_variable
*var
, int write_mask
)
226 this->write_mask
= write_mask
;
230 unsigned int write_mask
;
233 class ir_copy_propagation_elements_visitor
: public ir_rvalue_visitor
{
235 ir_copy_propagation_elements_visitor()
237 this->progress
= false;
238 this->killed_all
= false;
239 this->mem_ctx
= ralloc_context(NULL
);
240 this->lin_ctx
= linear_alloc_parent(this->mem_ctx
, 0);
241 this->shader_mem_ctx
= NULL
;
242 this->kills
= new(mem_ctx
) exec_list
;
243 this->state
= copy_propagation_state::create(mem_ctx
);
245 ~ir_copy_propagation_elements_visitor()
247 ralloc_free(mem_ctx
);
250 void handle_loop(ir_loop
*, bool keep_acp
);
251 virtual ir_visitor_status
visit_enter(class ir_loop
*);
252 virtual ir_visitor_status
visit_enter(class ir_function_signature
*);
253 virtual ir_visitor_status
visit_leave(class ir_assignment
*);
254 virtual ir_visitor_status
visit_enter(class ir_call
*);
255 virtual ir_visitor_status
visit_enter(class ir_if
*);
256 virtual ir_visitor_status
visit_leave(class ir_swizzle
*);
258 void handle_rvalue(ir_rvalue
**rvalue
);
260 void add_copy(ir_assignment
*ir
);
261 void kill(kill_entry
*k
);
262 void handle_if_block(exec_list
*instructions
, exec_list
*kills
, bool *killed_all
);
264 copy_propagation_state
*state
;
267 * List of kill_entry: The variables whose values were killed in this
276 /* Context for our local data structures. */
279 /* Context for allocating new shader nodes. */
280 void *shader_mem_ctx
;
283 } /* unnamed namespace */
286 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature
*ir
)
288 /* Treat entry into a function signature as a completely separate
289 * block. Any instructions at global scope will be shuffled into
290 * main() at link time, so they're irrelevant to us.
292 exec_list
*orig_kills
= this->kills
;
293 bool orig_killed_all
= this->killed_all
;
295 this->kills
= new(mem_ctx
) exec_list
;
296 this->killed_all
= false;
298 copy_propagation_state
*orig_state
= state
;
299 this->state
= copy_propagation_state::create(mem_ctx
);
301 visit_list_elements(this, &ir
->body
);
304 this->state
= orig_state
;
306 ralloc_free(this->kills
);
307 this->kills
= orig_kills
;
308 this->killed_all
= orig_killed_all
;
310 return visit_continue_with_parent
;
314 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment
*ir
)
316 ir_dereference_variable
*lhs
= ir
->lhs
->as_dereference_variable();
317 ir_variable
*var
= ir
->lhs
->variable_referenced();
319 if (var
->type
->is_scalar() || var
->type
->is_vector()) {
323 k
= new(this->lin_ctx
) kill_entry(var
, ir
->write_mask
);
325 k
= new(this->lin_ctx
) kill_entry(var
, ~0);
332 return visit_continue
;
336 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle
*)
338 /* Don't visit the values of swizzles since they are handled while
339 * visiting the swizzle itself.
341 return visit_continue
;
345 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
347 * This is where the actual copy propagation occurs. Note that the
348 * rewriting of ir_dereference means that the ir_dereference instance
349 * must not be shared by multiple IR operations!
352 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue
**ir
)
355 ir_dereference_variable
*deref_var
;
356 ir_variable
*source
[4] = {NULL
, NULL
, NULL
, NULL
};
357 int source_chan
[4] = {0, 0, 0, 0};
359 bool noop_swizzle
= true;
364 ir_swizzle
*swizzle
= (*ir
)->as_swizzle();
366 deref_var
= swizzle
->val
->as_dereference_variable();
370 swizzle_chan
[0] = swizzle
->mask
.x
;
371 swizzle_chan
[1] = swizzle
->mask
.y
;
372 swizzle_chan
[2] = swizzle
->mask
.z
;
373 swizzle_chan
[3] = swizzle
->mask
.w
;
374 chans
= swizzle
->type
->vector_elements
;
376 deref_var
= (*ir
)->as_dereference_variable();
384 chans
= deref_var
->type
->vector_elements
;
387 if (this->in_assignee
)
390 ir_variable
*var
= deref_var
->var
;
392 /* Try to find ACP entries covering swizzle_chan[], hoping they're
393 * the same source variable.
396 const acp_entry
*entry
= state
->read(var
);
398 for (int c
= 0; c
< chans
; c
++) {
399 unsigned index
= swizzle_chan
[c
];
400 ir_variable
*src
= entry
->rhs_element
[index
];
404 source_chan
[c
] = entry
->rhs_channel
[index
];
405 if (source_chan
[c
] != swizzle_chan
[c
])
406 noop_swizzle
= false;
410 /* Make sure all channels are copying from the same source variable. */
413 for (int c
= 1; c
< chans
; c
++) {
414 if (source
[c
] != source
[0])
419 shader_mem_ctx
= ralloc_parent(deref_var
);
421 /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
422 * of itself, which would just be deleted by opt_noop_swizzle).
424 if (source
[0] == var
&& noop_swizzle
)
428 printf("Copy propagation from:\n");
432 deref_var
= new(shader_mem_ctx
) ir_dereference_variable(source
[0]);
433 *ir
= new(shader_mem_ctx
) ir_swizzle(deref_var
,
450 ir_copy_propagation_elements_visitor::visit_enter(ir_call
*ir
)
452 /* Do copy propagation on call parameters, but skip any out params */
453 foreach_two_lists(formal_node
, &ir
->callee
->parameters
,
454 actual_node
, &ir
->actual_parameters
) {
455 ir_variable
*sig_param
= (ir_variable
*) formal_node
;
456 ir_rvalue
*ir
= (ir_rvalue
*) actual_node
;
457 if (sig_param
->data
.mode
!= ir_var_function_out
458 && sig_param
->data
.mode
!= ir_var_function_inout
) {
463 /* Since we're unlinked, we don't (necessarily) know the side effects of
464 * this call. So kill all copies.
466 this->state
->erase_all();
467 this->killed_all
= true;
469 return visit_continue_with_parent
;
473 ir_copy_propagation_elements_visitor::handle_if_block(exec_list
*instructions
, exec_list
*kills
, bool *killed_all
)
475 exec_list
*orig_kills
= this->kills
;
476 bool orig_killed_all
= this->killed_all
;
479 this->killed_all
= false;
481 /* Populate the initial acp with a copy of the original */
482 copy_propagation_state
*orig_state
= state
;
483 this->state
= orig_state
->clone();
485 visit_list_elements(this, instructions
);
488 this->state
= orig_state
;
490 *killed_all
= this->killed_all
;
491 this->kills
= orig_kills
;
492 this->killed_all
= orig_killed_all
;
496 ir_copy_propagation_elements_visitor::visit_enter(ir_if
*ir
)
498 ir
->condition
->accept(this);
500 exec_list
*new_kills
= new(mem_ctx
) exec_list
;
501 bool then_killed_all
= false;
502 bool else_killed_all
= false;
504 handle_if_block(&ir
->then_instructions
, new_kills
, &then_killed_all
);
505 handle_if_block(&ir
->else_instructions
, new_kills
, &else_killed_all
);
507 if (then_killed_all
|| else_killed_all
) {
511 foreach_in_list_safe(kill_entry
, k
, new_kills
)
515 ralloc_free(new_kills
);
517 /* handle_if_block() already descended into the children. */
518 return visit_continue_with_parent
;
522 ir_copy_propagation_elements_visitor::handle_loop(ir_loop
*ir
, bool keep_acp
)
524 exec_list
*orig_kills
= this->kills
;
525 bool orig_killed_all
= this->killed_all
;
527 this->kills
= new(mem_ctx
) exec_list
;
528 this->killed_all
= false;
530 copy_propagation_state
*orig_state
= state
;
533 /* Populate the initial acp with a copy of the original */
534 this->state
= orig_state
->clone();
536 this->state
= copy_propagation_state::create(mem_ctx
);
539 visit_list_elements(this, &ir
->body_instructions
);
542 this->state
= orig_state
;
544 if (this->killed_all
)
545 this->state
->erase_all();
547 exec_list
*new_kills
= this->kills
;
548 this->kills
= orig_kills
;
549 this->killed_all
= this->killed_all
|| orig_killed_all
;
551 foreach_in_list_safe(kill_entry
, k
, new_kills
) {
555 ralloc_free(new_kills
);
559 ir_copy_propagation_elements_visitor::visit_enter(ir_loop
*ir
)
561 handle_loop(ir
, false);
562 handle_loop(ir
, true);
564 /* already descended into the children. */
565 return visit_continue_with_parent
;
568 /* Remove any entries currently in the ACP for this kill. */
570 ir_copy_propagation_elements_visitor::kill(kill_entry
*k
)
572 state
->erase(k
->var
, k
->write_mask
);
574 /* If we were on a list, remove ourselves before inserting */
578 this->kills
->push_tail(k
);
582 * Adds directly-copied channels between vector variables to the available
583 * copy propagation list.
586 ir_copy_propagation_elements_visitor::add_copy(ir_assignment
*ir
)
588 int orig_swizzle
[4] = {0, 1, 2, 3};
594 ir_dereference_variable
*lhs
= ir
->lhs
->as_dereference_variable();
595 if (!lhs
|| !(lhs
->type
->is_scalar() || lhs
->type
->is_vector()))
598 if (lhs
->var
->data
.mode
== ir_var_shader_storage
||
599 lhs
->var
->data
.mode
== ir_var_shader_shared
)
602 ir_dereference_variable
*rhs
= ir
->rhs
->as_dereference_variable();
604 ir_swizzle
*swiz
= ir
->rhs
->as_swizzle();
608 rhs
= swiz
->val
->as_dereference_variable();
612 orig_swizzle
[0] = swiz
->mask
.x
;
613 orig_swizzle
[1] = swiz
->mask
.y
;
614 orig_swizzle
[2] = swiz
->mask
.z
;
615 orig_swizzle
[3] = swiz
->mask
.w
;
618 if (rhs
->var
->data
.mode
== ir_var_shader_storage
||
619 rhs
->var
->data
.mode
== ir_var_shader_shared
)
622 /* Move the swizzle channels out to the positions they match in the
623 * destination. We don't want to have to rewrite the swizzle[]
624 * array every time we clear a bit of the write_mask.
627 for (int i
= 0; i
< 4; i
++) {
628 if (ir
->write_mask
& (1 << i
))
629 swizzle
[i
] = orig_swizzle
[j
++];
632 int write_mask
= ir
->write_mask
;
633 if (lhs
->var
== rhs
->var
) {
634 /* If this is a copy from the variable to itself, then we need
635 * to be sure not to include the updated channels from this
636 * instruction in the set of new source channels to be
637 * copy-propagated from.
639 for (int i
= 0; i
< 4; i
++) {
640 if (ir
->write_mask
& (1 << orig_swizzle
[i
]))
641 write_mask
&= ~(1 << i
);
645 if (lhs
->var
->data
.precise
!= rhs
->var
->data
.precise
)
648 state
->write(lhs
->var
, rhs
->var
, write_mask
, swizzle
);
652 do_copy_propagation_elements(exec_list
*instructions
)
654 ir_copy_propagation_elements_visitor v
;
656 visit_list_elements(&v
, instructions
);