glsl: remove delegating constructors to allow build with C++98
[mesa.git] / src / compiler / glsl / opt_copy_propagation_elements.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_copy_propagation_elements.cpp
26 *
27 * Replaces usage of recently-copied components of variables with the
28 * previous copy of the variable.
29 *
30 * This pass can be compared with opt_copy_propagation, which operands
31 * on arbitrary whole-variable copies. However, in order to handle
32 * the copy propagation of swizzled variables or writemasked writes,
33 * we want to track things on a channel-wise basis. I found that
34 * trying to mix the swizzled/writemasked support here with the
35 * whole-variable stuff in opt_copy_propagation.cpp just made a mess,
36 * so this is separate despite the ACP handling being somewhat
37 * similar.
38 *
39 * This should reduce the number of MOV instructions in the generated
40 * programs unless copy propagation is also done on the LIR, and may
41 * help anyway by triggering other optimizations that live in the HIR.
42 */
43
44 #include "ir.h"
45 #include "ir_rvalue_visitor.h"
46 #include "ir_basic_block.h"
47 #include "ir_optimization.h"
48 #include "compiler/glsl_types.h"
49 #include "util/hash_table.h"
50 #include "util/set.h"
51
52 static bool debug = false;
53
54 namespace {
55
56 class acp_entry
57 {
58 public:
59 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry)
60
61 ir_variable *rhs_element[4];
62 unsigned rhs_channel[4];
63
64 set *dsts;
65 };
66
67 class copy_propagation_state {
68 public:
69 DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state);
70
71 static
72 copy_propagation_state* create(void *mem_ctx)
73 {
74 return new (mem_ctx) copy_propagation_state(NULL);
75 }
76
77 copy_propagation_state* clone()
78 {
79 return new (ralloc_parent(this)) copy_propagation_state(this);
80 }
81
82 void erase_all()
83 {
84 /* Individual elements were allocated from a linear allocator, so will
85 * be destroyed when the state is destroyed.
86 */
87 _mesa_hash_table_clear(acp, NULL);
88 fallback = NULL;
89 }
90
91 void erase(ir_variable *var, unsigned write_mask)
92 {
93 acp_entry *entry = pull_acp(var);
94
95 for (int i = 0; i < 4; i++) {
96 if (!entry->rhs_element[i])
97 continue;
98 if ((write_mask & (1 << i)) == 0)
99 continue;
100
101 ir_variable *to_remove = entry->rhs_element[i];
102 entry->rhs_element[i] = NULL;
103 remove_unused_var_from_dsts(entry, var, to_remove);
104 }
105
106 /* TODO: Check write mask, and possibly not clear everything. */
107
108 /* For any usage of our variable on the RHS, clear it out. */
109 struct set_entry *set_entry;
110 set_foreach(entry->dsts, set_entry) {
111 ir_variable *dst_var = (ir_variable *)set_entry->key;
112 acp_entry *dst_entry = pull_acp(dst_var);
113 for (int i = 0; i < 4; i++) {
114 if (dst_entry->rhs_element[i] == var)
115 dst_entry->rhs_element[i] = NULL;
116 }
117 _mesa_set_remove(entry->dsts, set_entry);
118 }
119 }
120
121 acp_entry *read(ir_variable *var)
122 {
123 for (copy_propagation_state *s = this; s != NULL; s = s->fallback) {
124 hash_entry *ht_entry = _mesa_hash_table_search(s->acp, var);
125 if (ht_entry)
126 return (acp_entry *) ht_entry->data;
127 }
128 return NULL;
129 }
130
131 void write(ir_variable *lhs, ir_variable *rhs, unsigned write_mask, int swizzle[4])
132 {
133 acp_entry *lhs_entry = pull_acp(lhs);
134
135 for (int i = 0; i < 4; i++) {
136 if ((write_mask & (1 << i)) == 0)
137 continue;
138 ir_variable *to_remove = lhs_entry->rhs_element[i];
139 lhs_entry->rhs_element[i] = rhs;
140 lhs_entry->rhs_channel[i] = swizzle[i];
141
142 remove_unused_var_from_dsts(lhs_entry, lhs, to_remove);
143 }
144
145 acp_entry *rhs_entry = pull_acp(rhs);
146 _mesa_set_add(rhs_entry->dsts, lhs);
147 }
148
149 void remove_unused_var_from_dsts(acp_entry *lhs_entry, ir_variable *lhs, ir_variable *var)
150 {
151 if (!var)
152 return;
153
154 /* If lhs still uses var, don't remove anything. */
155 for (int j = 0; j < 4; j++) {
156 if (lhs_entry->rhs_element[j] == var)
157 return;
158 }
159
160 acp_entry *element = pull_acp(var);
161 assert(element);
162 _mesa_set_remove_key(element->dsts, lhs);
163 }
164
165 private:
166 explicit copy_propagation_state(copy_propagation_state *fallback)
167 {
168 this->fallback = fallback;
169 /* Use 'this' as context for the table, no explicit destruction
170 * needed later.
171 */
172 acp = _mesa_hash_table_create(this, _mesa_hash_pointer,
173 _mesa_key_pointer_equal);
174 lin_ctx = linear_alloc_parent(this, 0);
175 }
176
177 acp_entry *pull_acp(ir_variable *var)
178 {
179 hash_entry *ht_entry = _mesa_hash_table_search(acp, var);
180 if (ht_entry)
181 return (acp_entry *) ht_entry->data;
182
183 /* If not found, create one and copy data from fallback if available. */
184 acp_entry *entry = new(lin_ctx) acp_entry();
185 _mesa_hash_table_insert(acp, var, entry);
186
187 bool found = false;
188 for (copy_propagation_state *s = fallback; s != NULL; s = s->fallback) {
189 hash_entry *fallback_ht_entry = _mesa_hash_table_search(s->acp, var);
190 if (fallback_ht_entry) {
191 acp_entry *fallback_entry = (acp_entry *) fallback_ht_entry->data;
192 *entry = *fallback_entry;
193 entry->dsts = _mesa_set_clone(fallback_entry->dsts, this);
194 found = true;
195 break;
196 }
197 }
198
199 if (!found) {
200 entry->dsts = _mesa_set_create(this, _mesa_hash_pointer,
201 _mesa_key_pointer_equal);
202 }
203
204 return entry;
205 }
206
207 /** Available Copy to Propagate table, from variable to the entry
208 * containing the current sources that can be used. */
209 hash_table *acp;
210
211 /** When a state is cloned, entries are copied on demand from fallback. */
212 copy_propagation_state *fallback;
213
214 void *lin_ctx;
215 };
216
217 class kill_entry : public exec_node
218 {
219 public:
220 /* override operator new from exec_node */
221 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry)
222
223 kill_entry(ir_variable *var, int write_mask)
224 {
225 this->var = var;
226 this->write_mask = write_mask;
227 }
228
229 ir_variable *var;
230 unsigned int write_mask;
231 };
232
233 class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
234 public:
235 ir_copy_propagation_elements_visitor()
236 {
237 this->progress = false;
238 this->killed_all = false;
239 this->mem_ctx = ralloc_context(NULL);
240 this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
241 this->shader_mem_ctx = NULL;
242 this->kills = new(mem_ctx) exec_list;
243 this->state = copy_propagation_state::create(mem_ctx);
244 }
245 ~ir_copy_propagation_elements_visitor()
246 {
247 ralloc_free(mem_ctx);
248 }
249
250 void handle_loop(ir_loop *, bool keep_acp);
251 virtual ir_visitor_status visit_enter(class ir_loop *);
252 virtual ir_visitor_status visit_enter(class ir_function_signature *);
253 virtual ir_visitor_status visit_leave(class ir_assignment *);
254 virtual ir_visitor_status visit_enter(class ir_call *);
255 virtual ir_visitor_status visit_enter(class ir_if *);
256 virtual ir_visitor_status visit_leave(class ir_swizzle *);
257
258 void handle_rvalue(ir_rvalue **rvalue);
259
260 void add_copy(ir_assignment *ir);
261 void kill(kill_entry *k);
262 void handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all);
263
264 copy_propagation_state *state;
265
266 /**
267 * List of kill_entry: The variables whose values were killed in this
268 * block.
269 */
270 exec_list *kills;
271
272 bool progress;
273
274 bool killed_all;
275
276 /* Context for our local data structures. */
277 void *mem_ctx;
278 void *lin_ctx;
279 /* Context for allocating new shader nodes. */
280 void *shader_mem_ctx;
281 };
282
283 } /* unnamed namespace */
284
285 ir_visitor_status
286 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
287 {
288 /* Treat entry into a function signature as a completely separate
289 * block. Any instructions at global scope will be shuffled into
290 * main() at link time, so they're irrelevant to us.
291 */
292 exec_list *orig_kills = this->kills;
293 bool orig_killed_all = this->killed_all;
294
295 this->kills = new(mem_ctx) exec_list;
296 this->killed_all = false;
297
298 copy_propagation_state *orig_state = state;
299 this->state = copy_propagation_state::create(mem_ctx);
300
301 visit_list_elements(this, &ir->body);
302
303 delete this->state;
304 this->state = orig_state;
305
306 ralloc_free(this->kills);
307 this->kills = orig_kills;
308 this->killed_all = orig_killed_all;
309
310 return visit_continue_with_parent;
311 }
312
313 ir_visitor_status
314 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
315 {
316 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
317 ir_variable *var = ir->lhs->variable_referenced();
318
319 if (var->type->is_scalar() || var->type->is_vector()) {
320 kill_entry *k;
321
322 if (lhs)
323 k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
324 else
325 k = new(this->lin_ctx) kill_entry(var, ~0);
326
327 kill(k);
328 }
329
330 add_copy(ir);
331
332 return visit_continue;
333 }
334
335 ir_visitor_status
336 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *)
337 {
338 /* Don't visit the values of swizzles since they are handled while
339 * visiting the swizzle itself.
340 */
341 return visit_continue;
342 }
343
344 /**
345 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
346 *
347 * This is where the actual copy propagation occurs. Note that the
348 * rewriting of ir_dereference means that the ir_dereference instance
349 * must not be shared by multiple IR operations!
350 */
351 void
352 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
353 {
354 int swizzle_chan[4];
355 ir_dereference_variable *deref_var;
356 ir_variable *source[4] = {NULL, NULL, NULL, NULL};
357 int source_chan[4] = {0, 0, 0, 0};
358 int chans;
359 bool noop_swizzle = true;
360
361 if (!*ir)
362 return;
363
364 ir_swizzle *swizzle = (*ir)->as_swizzle();
365 if (swizzle) {
366 deref_var = swizzle->val->as_dereference_variable();
367 if (!deref_var)
368 return;
369
370 swizzle_chan[0] = swizzle->mask.x;
371 swizzle_chan[1] = swizzle->mask.y;
372 swizzle_chan[2] = swizzle->mask.z;
373 swizzle_chan[3] = swizzle->mask.w;
374 chans = swizzle->type->vector_elements;
375 } else {
376 deref_var = (*ir)->as_dereference_variable();
377 if (!deref_var)
378 return;
379
380 swizzle_chan[0] = 0;
381 swizzle_chan[1] = 1;
382 swizzle_chan[2] = 2;
383 swizzle_chan[3] = 3;
384 chans = deref_var->type->vector_elements;
385 }
386
387 if (this->in_assignee)
388 return;
389
390 ir_variable *var = deref_var->var;
391
392 /* Try to find ACP entries covering swizzle_chan[], hoping they're
393 * the same source variable.
394 */
395
396 const acp_entry *entry = state->read(var);
397 if (entry) {
398 for (int c = 0; c < chans; c++) {
399 unsigned index = swizzle_chan[c];
400 ir_variable *src = entry->rhs_element[index];
401 if (!src)
402 continue;
403 source[c] = src;
404 source_chan[c] = entry->rhs_channel[index];
405 if (source_chan[c] != swizzle_chan[c])
406 noop_swizzle = false;
407 }
408 }
409
410 /* Make sure all channels are copying from the same source variable. */
411 if (!source[0])
412 return;
413 for (int c = 1; c < chans; c++) {
414 if (source[c] != source[0])
415 return;
416 }
417
418 if (!shader_mem_ctx)
419 shader_mem_ctx = ralloc_parent(deref_var);
420
421 /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
422 * of itself, which would just be deleted by opt_noop_swizzle).
423 */
424 if (source[0] == var && noop_swizzle)
425 return;
426
427 if (debug) {
428 printf("Copy propagation from:\n");
429 (*ir)->print();
430 }
431
432 deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
433 *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
434 source_chan[0],
435 source_chan[1],
436 source_chan[2],
437 source_chan[3],
438 chans);
439 progress = true;
440
441 if (debug) {
442 printf("to:\n");
443 (*ir)->print();
444 printf("\n");
445 }
446 }
447
448
449 ir_visitor_status
450 ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
451 {
452 /* Do copy propagation on call parameters, but skip any out params */
453 foreach_two_lists(formal_node, &ir->callee->parameters,
454 actual_node, &ir->actual_parameters) {
455 ir_variable *sig_param = (ir_variable *) formal_node;
456 ir_rvalue *ir = (ir_rvalue *) actual_node;
457 if (sig_param->data.mode != ir_var_function_out
458 && sig_param->data.mode != ir_var_function_inout) {
459 ir->accept(this);
460 }
461 }
462
463 /* Since we're unlinked, we don't (necessarily) know the side effects of
464 * this call. So kill all copies.
465 */
466 this->state->erase_all();
467 this->killed_all = true;
468
469 return visit_continue_with_parent;
470 }
471
472 void
473 ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all)
474 {
475 exec_list *orig_kills = this->kills;
476 bool orig_killed_all = this->killed_all;
477
478 this->kills = kills;
479 this->killed_all = false;
480
481 /* Populate the initial acp with a copy of the original */
482 copy_propagation_state *orig_state = state;
483 this->state = orig_state->clone();
484
485 visit_list_elements(this, instructions);
486
487 delete this->state;
488 this->state = orig_state;
489
490 *killed_all = this->killed_all;
491 this->kills = orig_kills;
492 this->killed_all = orig_killed_all;
493 }
494
495 ir_visitor_status
496 ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
497 {
498 ir->condition->accept(this);
499
500 exec_list *new_kills = new(mem_ctx) exec_list;
501 bool then_killed_all = false;
502 bool else_killed_all = false;
503
504 handle_if_block(&ir->then_instructions, new_kills, &then_killed_all);
505 handle_if_block(&ir->else_instructions, new_kills, &else_killed_all);
506
507 if (then_killed_all || else_killed_all) {
508 state->erase_all();
509 killed_all = true;
510 } else {
511 foreach_in_list_safe(kill_entry, k, new_kills)
512 kill(k);
513 }
514
515 ralloc_free(new_kills);
516
517 /* handle_if_block() already descended into the children. */
518 return visit_continue_with_parent;
519 }
520
521 void
522 ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp)
523 {
524 exec_list *orig_kills = this->kills;
525 bool orig_killed_all = this->killed_all;
526
527 this->kills = new(mem_ctx) exec_list;
528 this->killed_all = false;
529
530 copy_propagation_state *orig_state = state;
531
532 if (keep_acp) {
533 /* Populate the initial acp with a copy of the original */
534 this->state = orig_state->clone();
535 } else {
536 this->state = copy_propagation_state::create(mem_ctx);
537 }
538
539 visit_list_elements(this, &ir->body_instructions);
540
541 delete this->state;
542 this->state = orig_state;
543
544 if (this->killed_all)
545 this->state->erase_all();
546
547 exec_list *new_kills = this->kills;
548 this->kills = orig_kills;
549 this->killed_all = this->killed_all || orig_killed_all;
550
551 foreach_in_list_safe(kill_entry, k, new_kills) {
552 kill(k);
553 }
554
555 ralloc_free(new_kills);
556 }
557
558 ir_visitor_status
559 ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
560 {
561 handle_loop(ir, false);
562 handle_loop(ir, true);
563
564 /* already descended into the children. */
565 return visit_continue_with_parent;
566 }
567
568 /* Remove any entries currently in the ACP for this kill. */
569 void
570 ir_copy_propagation_elements_visitor::kill(kill_entry *k)
571 {
572 state->erase(k->var, k->write_mask);
573
574 /* If we were on a list, remove ourselves before inserting */
575 if (k->next)
576 k->remove();
577
578 this->kills->push_tail(k);
579 }
580
581 /**
582 * Adds directly-copied channels between vector variables to the available
583 * copy propagation list.
584 */
585 void
586 ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
587 {
588 int orig_swizzle[4] = {0, 1, 2, 3};
589 int swizzle[4];
590
591 if (ir->condition)
592 return;
593
594 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
595 if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
596 return;
597
598 if (lhs->var->data.mode == ir_var_shader_storage ||
599 lhs->var->data.mode == ir_var_shader_shared)
600 return;
601
602 ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
603 if (!rhs) {
604 ir_swizzle *swiz = ir->rhs->as_swizzle();
605 if (!swiz)
606 return;
607
608 rhs = swiz->val->as_dereference_variable();
609 if (!rhs)
610 return;
611
612 orig_swizzle[0] = swiz->mask.x;
613 orig_swizzle[1] = swiz->mask.y;
614 orig_swizzle[2] = swiz->mask.z;
615 orig_swizzle[3] = swiz->mask.w;
616 }
617
618 if (rhs->var->data.mode == ir_var_shader_storage ||
619 rhs->var->data.mode == ir_var_shader_shared)
620 return;
621
622 /* Move the swizzle channels out to the positions they match in the
623 * destination. We don't want to have to rewrite the swizzle[]
624 * array every time we clear a bit of the write_mask.
625 */
626 int j = 0;
627 for (int i = 0; i < 4; i++) {
628 if (ir->write_mask & (1 << i))
629 swizzle[i] = orig_swizzle[j++];
630 }
631
632 int write_mask = ir->write_mask;
633 if (lhs->var == rhs->var) {
634 /* If this is a copy from the variable to itself, then we need
635 * to be sure not to include the updated channels from this
636 * instruction in the set of new source channels to be
637 * copy-propagated from.
638 */
639 for (int i = 0; i < 4; i++) {
640 if (ir->write_mask & (1 << orig_swizzle[i]))
641 write_mask &= ~(1 << i);
642 }
643 }
644
645 if (lhs->var->data.precise != rhs->var->data.precise)
646 return;
647
648 state->write(lhs->var, rhs->var, write_mask, swizzle);
649 }
650
651 bool
652 do_copy_propagation_elements(exec_list *instructions)
653 {
654 ir_copy_propagation_elements_visitor v;
655
656 visit_list_elements(&v, instructions);
657
658 return v.progress;
659 }