glsl: fix the type of ir_constant_data::u16
[mesa.git] / src / compiler / glsl / opt_copy_propagation_elements.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_copy_propagation_elements.cpp
26 *
27 * Replaces usage of recently-copied components of variables with the
28 * previous copy of the variable.
29 *
30 * This should reduce the number of MOV instructions in the generated
31 * programs and help triggering other optimizations that live in GLSL
32 * level.
33 */
34
35 #include "ir.h"
36 #include "ir_rvalue_visitor.h"
37 #include "ir_basic_block.h"
38 #include "ir_optimization.h"
39 #include "compiler/glsl_types.h"
40 #include "util/hash_table.h"
41 #include "util/set.h"
42
43 static bool debug = false;
44
45 namespace {
46
47 class acp_entry
48 {
49 public:
50 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry)
51
52 /* If set, rhs_full indicates that this ACP entry represents a
53 * whole-variable copy. The rhs_element[] array will still be filled,
54 * to allow the swizzling from its components in case the variable
55 * was a vector (and to simplify some of the erase() and write_vector()
56 * logic).
57 */
58
59 ir_variable *rhs_full;
60 ir_variable *rhs_element[4];
61 unsigned rhs_channel[4];
62
63 /* Set of variables that use the variable associated with this acp_entry as
64 * RHS. This has the "reverse references" of rhs_full/rhs_element. It is
65 * used to speed up invalidating those references when the acp_entry
66 * changes.
67 */
68 set *dsts;
69 };
70
71 class copy_propagation_state {
72 public:
73 DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state);
74
75 static
76 copy_propagation_state* create(void *mem_ctx)
77 {
78 return new (mem_ctx) copy_propagation_state(NULL);
79 }
80
81 copy_propagation_state* clone()
82 {
83 return new (ralloc_parent(this)) copy_propagation_state(this);
84 }
85
86 void erase_all()
87 {
88 /* Individual elements were allocated from a linear allocator, so will
89 * be destroyed when the state is destroyed.
90 */
91 _mesa_hash_table_clear(acp, NULL);
92 fallback = NULL;
93 }
94
95 void erase(ir_variable *var, unsigned write_mask)
96 {
97 acp_entry *entry = pull_acp(var);
98 entry->rhs_full = NULL;
99
100 for (int i = 0; i < 4; i++) {
101 if (!entry->rhs_element[i])
102 continue;
103 if ((write_mask & (1 << i)) == 0)
104 continue;
105
106 ir_variable *to_remove = entry->rhs_element[i];
107 entry->rhs_element[i] = NULL;
108 remove_unused_var_from_dsts(entry, var, to_remove);
109 }
110
111 /* TODO: Check write mask, and possibly not clear everything. */
112
113 /* For any usage of our variable on the RHS, clear it out. */
114 set_foreach(entry->dsts, set_entry) {
115 ir_variable *dst_var = (ir_variable *)set_entry->key;
116 acp_entry *dst_entry = pull_acp(dst_var);
117 for (int i = 0; i < 4; i++) {
118 if (dst_entry->rhs_element[i] == var)
119 dst_entry->rhs_element[i] = NULL;
120 }
121 if (dst_entry->rhs_full == var)
122 dst_entry->rhs_full = NULL;
123 _mesa_set_remove(entry->dsts, set_entry);
124 }
125 }
126
127 acp_entry *read(ir_variable *var)
128 {
129 for (copy_propagation_state *s = this; s != NULL; s = s->fallback) {
130 hash_entry *ht_entry = _mesa_hash_table_search(s->acp, var);
131 if (ht_entry)
132 return (acp_entry *) ht_entry->data;
133 }
134 return NULL;
135 }
136
137 void write_elements(ir_variable *lhs, ir_variable *rhs, unsigned write_mask, int swizzle[4])
138 {
139 acp_entry *lhs_entry = pull_acp(lhs);
140 lhs_entry->rhs_full = NULL;
141
142 for (int i = 0; i < 4; i++) {
143 if ((write_mask & (1 << i)) == 0)
144 continue;
145 ir_variable *to_remove = lhs_entry->rhs_element[i];
146 lhs_entry->rhs_element[i] = rhs;
147 lhs_entry->rhs_channel[i] = swizzle[i];
148
149 remove_unused_var_from_dsts(lhs_entry, lhs, to_remove);
150 }
151
152 acp_entry *rhs_entry = pull_acp(rhs);
153 _mesa_set_add(rhs_entry->dsts, lhs);
154 }
155
156 void write_full(ir_variable *lhs, ir_variable *rhs)
157 {
158 acp_entry *lhs_entry = pull_acp(lhs);
159 if (lhs_entry->rhs_full == rhs)
160 return;
161
162 if (lhs_entry->rhs_full) {
163 remove_from_dsts(lhs_entry->rhs_full, lhs);
164 } else if (lhs->type->is_vector()) {
165 for (int i = 0; i < 4; i++) {
166 if (lhs_entry->rhs_element[i])
167 remove_from_dsts(lhs_entry->rhs_element[i], lhs);
168 }
169 }
170
171 lhs_entry->rhs_full = rhs;
172 acp_entry *rhs_entry = pull_acp(rhs);
173 _mesa_set_add(rhs_entry->dsts, lhs);
174
175 if (lhs->type->is_vector()) {
176 for (int i = 0; i < 4; i++) {
177 lhs_entry->rhs_element[i] = rhs;
178 lhs_entry->rhs_channel[i] = i;
179 }
180 }
181 }
182
183 void remove_unused_var_from_dsts(acp_entry *lhs_entry, ir_variable *lhs, ir_variable *var)
184 {
185 if (!var)
186 return;
187
188 /* If lhs still uses var, don't remove anything. */
189 for (int j = 0; j < 4; j++) {
190 if (lhs_entry->rhs_element[j] == var)
191 return;
192 }
193
194 acp_entry *element = pull_acp(var);
195 assert(element);
196 _mesa_set_remove_key(element->dsts, lhs);
197 }
198
199 private:
200 explicit copy_propagation_state(copy_propagation_state *fallback)
201 {
202 this->fallback = fallback;
203 /* Use 'this' as context for the table, no explicit destruction
204 * needed later.
205 */
206 acp = _mesa_pointer_hash_table_create(this);
207 lin_ctx = linear_alloc_parent(this, 0);
208 }
209
210 acp_entry *pull_acp(ir_variable *var)
211 {
212 hash_entry *ht_entry = _mesa_hash_table_search(acp, var);
213 if (ht_entry)
214 return (acp_entry *) ht_entry->data;
215
216 /* If not found, create one and copy data from fallback if available. */
217 acp_entry *entry = new(lin_ctx) acp_entry();
218 _mesa_hash_table_insert(acp, var, entry);
219
220 bool found = false;
221 for (copy_propagation_state *s = fallback; s != NULL; s = s->fallback) {
222 hash_entry *fallback_ht_entry = _mesa_hash_table_search(s->acp, var);
223 if (fallback_ht_entry) {
224 acp_entry *fallback_entry = (acp_entry *) fallback_ht_entry->data;
225 *entry = *fallback_entry;
226 entry->dsts = _mesa_set_clone(fallback_entry->dsts, this);
227 found = true;
228 break;
229 }
230 }
231
232 if (!found) {
233 entry->dsts = _mesa_pointer_set_create(this);
234 }
235
236 return entry;
237 }
238
239 void
240 remove_from_dsts(ir_variable *var, ir_variable *to_remove)
241 {
242 acp_entry *entry = pull_acp(var);
243 assert(entry);
244 _mesa_set_remove_key(entry->dsts, to_remove);
245 }
246
247 /** Available Copy to Propagate table, from variable to the entry
248 * containing the current sources that can be used. */
249 hash_table *acp;
250
251 /** When a state is cloned, entries are copied on demand from fallback. */
252 copy_propagation_state *fallback;
253
254 void *lin_ctx;
255 };
256
257 class kill_entry : public exec_node
258 {
259 public:
260 /* override operator new from exec_node */
261 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry)
262
263 kill_entry(ir_variable *var, int write_mask)
264 {
265 this->var = var;
266 this->write_mask = write_mask;
267 }
268
269 ir_variable *var;
270 unsigned int write_mask;
271 };
272
273 class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
274 public:
275 ir_copy_propagation_elements_visitor()
276 {
277 this->progress = false;
278 this->killed_all = false;
279 this->mem_ctx = ralloc_context(NULL);
280 this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
281 this->shader_mem_ctx = NULL;
282 this->kills = new(mem_ctx) exec_list;
283 this->state = copy_propagation_state::create(mem_ctx);
284 }
285 ~ir_copy_propagation_elements_visitor()
286 {
287 ralloc_free(mem_ctx);
288 }
289
290 virtual ir_visitor_status visit(ir_dereference_variable *);
291
292 void handle_loop(ir_loop *, bool keep_acp);
293 virtual ir_visitor_status visit_enter(class ir_loop *);
294 virtual ir_visitor_status visit_enter(class ir_function_signature *);
295 virtual ir_visitor_status visit_leave(class ir_assignment *);
296 virtual ir_visitor_status visit_enter(class ir_call *);
297 virtual ir_visitor_status visit_enter(class ir_if *);
298 virtual ir_visitor_status visit_leave(class ir_swizzle *);
299
300 void handle_rvalue(ir_rvalue **rvalue);
301
302 void add_copy(ir_assignment *ir);
303 void kill(kill_entry *k);
304 void handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all);
305
306 copy_propagation_state *state;
307
308 /**
309 * List of kill_entry: The variables whose values were killed in this
310 * block.
311 */
312 exec_list *kills;
313
314 bool progress;
315
316 bool killed_all;
317
318 /* Context for our local data structures. */
319 void *mem_ctx;
320 void *lin_ctx;
321 /* Context for allocating new shader nodes. */
322 void *shader_mem_ctx;
323 };
324
325 } /* unnamed namespace */
326
327 ir_visitor_status
328 ir_copy_propagation_elements_visitor::visit(ir_dereference_variable *ir)
329 {
330 if (this->in_assignee)
331 return visit_continue;
332
333 const acp_entry *entry = state->read(ir->var);
334 if (entry && entry->rhs_full) {
335 ir->var = (ir_variable *) entry->rhs_full;
336 progress = true;
337 }
338
339 return visit_continue;
340 }
341
342 ir_visitor_status
343 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
344 {
345 /* Treat entry into a function signature as a completely separate
346 * block. Any instructions at global scope will be shuffled into
347 * main() at link time, so they're irrelevant to us.
348 */
349 exec_list *orig_kills = this->kills;
350 bool orig_killed_all = this->killed_all;
351
352 this->kills = new(mem_ctx) exec_list;
353 this->killed_all = false;
354
355 copy_propagation_state *orig_state = state;
356 this->state = copy_propagation_state::create(mem_ctx);
357
358 visit_list_elements(this, &ir->body);
359
360 delete this->state;
361 this->state = orig_state;
362
363 ralloc_free(this->kills);
364 this->kills = orig_kills;
365 this->killed_all = orig_killed_all;
366
367 return visit_continue_with_parent;
368 }
369
370 ir_visitor_status
371 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
372 {
373 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
374 ir_variable *var = ir->lhs->variable_referenced();
375
376 kill_entry *k;
377
378 if (lhs && var->type->is_vector())
379 k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
380 else
381 k = new(this->lin_ctx) kill_entry(var, ~0);
382
383 kill(k);
384
385 add_copy(ir);
386
387 return visit_continue;
388 }
389
390 ir_visitor_status
391 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *)
392 {
393 /* Don't visit the values of swizzles since they are handled while
394 * visiting the swizzle itself.
395 */
396 return visit_continue;
397 }
398
399 /**
400 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
401 *
402 * This is where the actual copy propagation occurs. Note that the
403 * rewriting of ir_dereference means that the ir_dereference instance
404 * must not be shared by multiple IR operations!
405 */
406 void
407 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
408 {
409 int swizzle_chan[4];
410 ir_dereference_variable *deref_var;
411 ir_variable *source[4] = {NULL, NULL, NULL, NULL};
412 int source_chan[4] = {0, 0, 0, 0};
413 int chans;
414 bool noop_swizzle = true;
415
416 if (!*ir)
417 return;
418
419 ir_swizzle *swizzle = (*ir)->as_swizzle();
420 if (swizzle) {
421 deref_var = swizzle->val->as_dereference_variable();
422 if (!deref_var)
423 return;
424
425 swizzle_chan[0] = swizzle->mask.x;
426 swizzle_chan[1] = swizzle->mask.y;
427 swizzle_chan[2] = swizzle->mask.z;
428 swizzle_chan[3] = swizzle->mask.w;
429 chans = swizzle->type->vector_elements;
430 } else {
431 deref_var = (*ir)->as_dereference_variable();
432 if (!deref_var)
433 return;
434
435 swizzle_chan[0] = 0;
436 swizzle_chan[1] = 1;
437 swizzle_chan[2] = 2;
438 swizzle_chan[3] = 3;
439 chans = deref_var->type->vector_elements;
440 }
441
442 if (this->in_assignee)
443 return;
444
445 ir_variable *var = deref_var->var;
446
447 /* Try to find ACP entries covering swizzle_chan[], hoping they're
448 * the same source variable.
449 */
450
451 const acp_entry *entry = state->read(var);
452 if (entry) {
453 for (int c = 0; c < chans; c++) {
454 unsigned index = swizzle_chan[c];
455 ir_variable *src = entry->rhs_element[index];
456 if (!src)
457 continue;
458 source[c] = src;
459 source_chan[c] = entry->rhs_channel[index];
460 if (source_chan[c] != swizzle_chan[c])
461 noop_swizzle = false;
462 }
463 }
464
465 /* Make sure all channels are copying from the same source variable. */
466 if (!source[0])
467 return;
468 for (int c = 1; c < chans; c++) {
469 if (source[c] != source[0])
470 return;
471 }
472
473 if (!shader_mem_ctx)
474 shader_mem_ctx = ralloc_parent(deref_var);
475
476 /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
477 * of itself, which would just be deleted by opt_noop_swizzle).
478 */
479 if (source[0] == var && noop_swizzle)
480 return;
481
482 if (debug) {
483 printf("Copy propagation from:\n");
484 (*ir)->print();
485 }
486
487 deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
488 *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
489 source_chan[0],
490 source_chan[1],
491 source_chan[2],
492 source_chan[3],
493 chans);
494 progress = true;
495
496 if (debug) {
497 printf("to:\n");
498 (*ir)->print();
499 printf("\n");
500 }
501 }
502
503
504 ir_visitor_status
505 ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
506 {
507 /* Do copy propagation on call parameters, but skip any out params */
508 foreach_two_lists(formal_node, &ir->callee->parameters,
509 actual_node, &ir->actual_parameters) {
510 ir_variable *sig_param = (ir_variable *) formal_node;
511 ir_rvalue *ir = (ir_rvalue *) actual_node;
512 if (sig_param->data.mode != ir_var_function_out
513 && sig_param->data.mode != ir_var_function_inout) {
514 ir->accept(this);
515 }
516 }
517
518 if (!ir->callee->is_intrinsic()) {
519 state->erase_all();
520 this->killed_all = true;
521 } else {
522 if (ir->return_deref) {
523 kill(new(this->lin_ctx) kill_entry(ir->return_deref->var, ~0));
524 }
525
526 foreach_two_lists(formal_node, &ir->callee->parameters,
527 actual_node, &ir->actual_parameters) {
528 ir_variable *sig_param = (ir_variable *) formal_node;
529 if (sig_param->data.mode == ir_var_function_out ||
530 sig_param->data.mode == ir_var_function_inout) {
531 ir_rvalue *ir = (ir_rvalue *) actual_node;
532 ir_variable *var = ir->variable_referenced();
533 kill(new(this->lin_ctx) kill_entry(var, ~0));
534 }
535 }
536 }
537
538 return visit_continue_with_parent;
539 }
540
541 void
542 ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all)
543 {
544 exec_list *orig_kills = this->kills;
545 bool orig_killed_all = this->killed_all;
546
547 this->kills = kills;
548 this->killed_all = false;
549
550 /* Populate the initial acp with a copy of the original */
551 copy_propagation_state *orig_state = state;
552 this->state = orig_state->clone();
553
554 visit_list_elements(this, instructions);
555
556 delete this->state;
557 this->state = orig_state;
558
559 *killed_all = this->killed_all;
560 this->kills = orig_kills;
561 this->killed_all = orig_killed_all;
562 }
563
564 ir_visitor_status
565 ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
566 {
567 ir->condition->accept(this);
568
569 exec_list *new_kills = new(mem_ctx) exec_list;
570 bool then_killed_all = false;
571 bool else_killed_all = false;
572
573 handle_if_block(&ir->then_instructions, new_kills, &then_killed_all);
574 handle_if_block(&ir->else_instructions, new_kills, &else_killed_all);
575
576 if (then_killed_all || else_killed_all) {
577 state->erase_all();
578 killed_all = true;
579 } else {
580 foreach_in_list_safe(kill_entry, k, new_kills)
581 kill(k);
582 }
583
584 ralloc_free(new_kills);
585
586 /* handle_if_block() already descended into the children. */
587 return visit_continue_with_parent;
588 }
589
590 void
591 ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp)
592 {
593 exec_list *orig_kills = this->kills;
594 bool orig_killed_all = this->killed_all;
595
596 this->kills = new(mem_ctx) exec_list;
597 this->killed_all = false;
598
599 copy_propagation_state *orig_state = state;
600
601 if (keep_acp) {
602 /* Populate the initial acp with a copy of the original */
603 this->state = orig_state->clone();
604 } else {
605 this->state = copy_propagation_state::create(mem_ctx);
606 }
607
608 visit_list_elements(this, &ir->body_instructions);
609
610 delete this->state;
611 this->state = orig_state;
612
613 if (this->killed_all)
614 this->state->erase_all();
615
616 exec_list *new_kills = this->kills;
617 this->kills = orig_kills;
618 this->killed_all = this->killed_all || orig_killed_all;
619
620 foreach_in_list_safe(kill_entry, k, new_kills) {
621 kill(k);
622 }
623
624 ralloc_free(new_kills);
625 }
626
627 ir_visitor_status
628 ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
629 {
630 handle_loop(ir, false);
631 handle_loop(ir, true);
632
633 /* already descended into the children. */
634 return visit_continue_with_parent;
635 }
636
637 /* Remove any entries currently in the ACP for this kill. */
638 void
639 ir_copy_propagation_elements_visitor::kill(kill_entry *k)
640 {
641 state->erase(k->var, k->write_mask);
642
643 /* If we were on a list, remove ourselves before inserting */
644 if (k->next)
645 k->remove();
646
647 this->kills->push_tail(k);
648 }
649
650 /**
651 * Adds directly-copied channels between vector variables to the available
652 * copy propagation list.
653 */
654 void
655 ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
656 {
657 if (ir->condition)
658 return;
659
660 {
661 ir_variable *lhs_var = ir->whole_variable_written();
662 ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
663
664 if (lhs_var != NULL && rhs && rhs->var != NULL && lhs_var != rhs->var) {
665 if (lhs_var->data.mode == ir_var_shader_storage ||
666 lhs_var->data.mode == ir_var_shader_shared ||
667 rhs->var->data.mode == ir_var_shader_storage ||
668 rhs->var->data.mode == ir_var_shader_shared ||
669 lhs_var->data.precise != rhs->var->data.precise) {
670 return;
671 }
672 state->write_full(lhs_var, rhs->var);
673 return;
674 }
675 }
676
677 int orig_swizzle[4] = {0, 1, 2, 3};
678 int swizzle[4];
679
680 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
681 if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
682 return;
683
684 if (lhs->var->data.mode == ir_var_shader_storage ||
685 lhs->var->data.mode == ir_var_shader_shared)
686 return;
687
688 ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
689 if (!rhs) {
690 ir_swizzle *swiz = ir->rhs->as_swizzle();
691 if (!swiz)
692 return;
693
694 rhs = swiz->val->as_dereference_variable();
695 if (!rhs)
696 return;
697
698 orig_swizzle[0] = swiz->mask.x;
699 orig_swizzle[1] = swiz->mask.y;
700 orig_swizzle[2] = swiz->mask.z;
701 orig_swizzle[3] = swiz->mask.w;
702 }
703
704 if (rhs->var->data.mode == ir_var_shader_storage ||
705 rhs->var->data.mode == ir_var_shader_shared)
706 return;
707
708 /* Move the swizzle channels out to the positions they match in the
709 * destination. We don't want to have to rewrite the swizzle[]
710 * array every time we clear a bit of the write_mask.
711 */
712 int j = 0;
713 for (int i = 0; i < 4; i++) {
714 if (ir->write_mask & (1 << i))
715 swizzle[i] = orig_swizzle[j++];
716 }
717
718 int write_mask = ir->write_mask;
719 if (lhs->var == rhs->var) {
720 /* If this is a copy from the variable to itself, then we need
721 * to be sure not to include the updated channels from this
722 * instruction in the set of new source channels to be
723 * copy-propagated from.
724 */
725 for (int i = 0; i < 4; i++) {
726 if (ir->write_mask & (1 << orig_swizzle[i]))
727 write_mask &= ~(1 << i);
728 }
729 }
730
731 if (lhs->var->data.precise != rhs->var->data.precise)
732 return;
733
734 state->write_elements(lhs->var, rhs->var, write_mask, swizzle);
735 }
736
737 bool
738 do_copy_propagation_elements(exec_list *instructions)
739 {
740 ir_copy_propagation_elements_visitor v;
741
742 visit_list_elements(&v, instructions);
743
744 return v.progress;
745 }