mesa/st: glsl_to_tgsi: add register rename mapping evaluator
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi_temprename.cpp
1 /*
2 * Copyright © 2017 Gert Wollny
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "st_glsl_to_tgsi_temprename.h"
25 #include <tgsi/tgsi_info.h>
26 #include <tgsi/tgsi_strings.h>
27 #include <program/prog_instruction.h>
28 #include <limits>
29 #include <cstdlib>
30
31 /* std::sort is significantly faster than qsort */
32 #define USE_STL_SORT
33 #ifdef USE_STL_SORT
34 #include <algorithm>
35 #endif
36
37 #ifndef NDEBUG
38 #include <iostream>
39 #include <iomanip>
40 #include <program/prog_print.h>
41 #include <util/debug.h>
42 using std::cerr;
43 using std::setw;
44 #endif
45
46 using std::numeric_limits;
47
48 /* Without c++11 define the nullptr for forward-compatibility
49 * and better readibility */
50 #if __cplusplus < 201103L
51 #define nullptr 0
52 #endif
53
54 #ifndef NDEBUG
55 /* Helper function to check whether we want to seen debugging output */
56 static inline bool is_debug_enabled ()
57 {
58 static int debug_enabled = -1;
59 if (debug_enabled < 0)
60 debug_enabled = env_var_as_boolean("GLSL_TO_TGSI_RENAME_DEBUG", false);
61 return debug_enabled > 0;
62 }
63 #define RENAME_DEBUG(X) if (is_debug_enabled()) do { X; } while (false);
64 #else
65 #define RENAME_DEBUG(X)
66 #endif
67
68 namespace {
69
70 enum prog_scope_type {
71 outer_scope, /* Outer program scope */
72 loop_body, /* Inside a loop */
73 if_branch, /* Inside if branch */
74 else_branch, /* Inside else branch */
75 switch_body, /* Inside switch statmenet */
76 switch_case_branch, /* Inside switch case statmenet */
77 switch_default_branch, /* Inside switch default statmenet */
78 undefined_scope
79 };
80
81 class prog_scope {
82 public:
83 prog_scope(prog_scope *parent, prog_scope_type type, int id,
84 int depth, int begin);
85
86 prog_scope_type type() const;
87 prog_scope *parent() const;
88 int nesting_depth() const;
89 int id() const;
90 int end() const;
91 int begin() const;
92 int loop_break_line() const;
93
94 const prog_scope *in_ifelse_scope() const;
95 const prog_scope *in_switchcase_scope() const;
96 const prog_scope *innermost_loop() const;
97 const prog_scope *outermost_loop() const;
98 const prog_scope *enclosing_conditional() const;
99
100 bool is_loop() const;
101 bool is_in_loop() const;
102 bool is_conditional() const;
103 bool is_conditional_in_loop() const;
104
105 bool break_is_for_switchcase() const;
106 bool contains_range_of(const prog_scope& other) const;
107 const st_src_reg *switch_register() const;
108
109 void set_end(int end);
110 void set_loop_break_line(int line);
111
112 private:
113 prog_scope_type scope_type;
114 int scope_id;
115 int scope_nesting_depth;
116 int scope_begin;
117 int scope_end;
118 int break_loop_line;
119 prog_scope *parent_scope;
120 const st_src_reg *switch_reg;
121 };
122
123 /* Some storage class to encapsulate the prog_scope (de-)allocations */
124 class prog_scope_storage {
125 public:
126 prog_scope_storage(void *mem_ctx, int n);
127 ~prog_scope_storage();
128 prog_scope * create(prog_scope *p, prog_scope_type type, int id,
129 int lvl, int s_begin);
130 private:
131 void *mem_ctx;
132 int current_slot;
133 prog_scope *storage;
134 };
135
136 class temp_comp_access {
137 public:
138 temp_comp_access();
139 void record_read(int line, prog_scope *scope);
140 void record_write(int line, prog_scope *scope);
141 lifetime get_required_lifetime();
142 private:
143 void propagate_lifetime_to_dominant_write_scope();
144
145 prog_scope *last_read_scope;
146 prog_scope *first_read_scope;
147 prog_scope *first_write_scope;
148 int first_write;
149 int last_read;
150 int last_write;
151 int first_read;
152 bool keep_for_full_loop;
153 };
154
155 class temp_access {
156 public:
157 temp_access();
158 void record_read(int line, prog_scope *scope, int swizzle);
159 void record_write(int line, prog_scope *scope, int writemask);
160 lifetime get_required_lifetime();
161 private:
162 void update_access_mask(int mask);
163
164 temp_comp_access comp[4];
165 int access_mask;
166 bool needs_component_tracking;
167 };
168
169 prog_scope_storage::prog_scope_storage(void *mc, int n):
170 mem_ctx(mc),
171 current_slot(0)
172 {
173 storage = ralloc_array(mem_ctx, prog_scope, n);
174 }
175
176 prog_scope_storage::~prog_scope_storage()
177 {
178 ralloc_free(storage);
179 }
180
181 prog_scope*
182 prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id,
183 int lvl, int s_begin)
184 {
185 storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
186 return &storage[current_slot++];
187 }
188
189 prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id,
190 int depth, int scope_begin):
191 scope_type(type),
192 scope_id(id),
193 scope_nesting_depth(depth),
194 scope_begin(scope_begin),
195 scope_end(-1),
196 break_loop_line(numeric_limits<int>::max()),
197 parent_scope(parent),
198 switch_reg(nullptr)
199 {
200 }
201
202 prog_scope_type prog_scope::type() const
203 {
204 return scope_type;
205 }
206
207 prog_scope *prog_scope::parent() const
208 {
209 return parent_scope;
210 }
211
212 int prog_scope::nesting_depth() const
213 {
214 return scope_nesting_depth;
215 }
216
217 bool prog_scope::is_loop() const
218 {
219 return (scope_type == loop_body);
220 }
221
222 bool prog_scope::is_in_loop() const
223 {
224 if (scope_type == loop_body)
225 return true;
226
227 if (parent_scope)
228 return parent_scope->is_in_loop();
229
230 return false;
231 }
232
233 bool prog_scope::is_conditional_in_loop() const
234 {
235 return is_conditional() && is_in_loop();
236 }
237
238 const prog_scope *prog_scope::innermost_loop() const
239 {
240 if (scope_type == loop_body)
241 return this;
242
243 if (parent_scope)
244 return parent_scope->innermost_loop();
245
246 return nullptr;
247 }
248
249 const prog_scope *prog_scope::outermost_loop() const
250 {
251 const prog_scope *loop = nullptr;
252 const prog_scope *p = this;
253
254 do {
255 if (p->type() == loop_body)
256 loop = p;
257 p = p->parent();
258 } while (p);
259
260 return loop;
261 }
262
263 const prog_scope *prog_scope::enclosing_conditional() const
264 {
265 if (is_conditional())
266 return this;
267
268 if (parent_scope)
269 return parent_scope->enclosing_conditional();
270
271 return nullptr;
272 }
273
274 bool prog_scope::contains_range_of(const prog_scope& other) const
275 {
276 return (begin() <= other.begin()) && (end() >= other.end());
277 }
278
279 bool prog_scope::is_conditional() const
280 {
281 return scope_type == if_branch ||
282 scope_type == else_branch ||
283 scope_type == switch_case_branch ||
284 scope_type == switch_default_branch;
285 }
286
287 const prog_scope *prog_scope::in_ifelse_scope() const
288 {
289 if (scope_type == if_branch ||
290 scope_type == else_branch)
291 return this;
292
293 if (parent_scope)
294 return parent_scope->in_ifelse_scope();
295
296 return nullptr;
297 }
298
299 const st_src_reg *prog_scope::switch_register() const
300 {
301 return switch_reg;
302 }
303
304 const prog_scope *prog_scope::in_switchcase_scope() const
305 {
306 if (scope_type == switch_case_branch ||
307 scope_type == switch_default_branch)
308 return this;
309
310 if (parent_scope)
311 return parent_scope->in_switchcase_scope();
312
313 return nullptr;
314 }
315
316 bool prog_scope::break_is_for_switchcase() const
317 {
318 if (scope_type == loop_body)
319 return false;
320
321 if (scope_type == switch_case_branch ||
322 scope_type == switch_default_branch ||
323 scope_type == switch_body)
324 return true;
325
326 if (parent_scope)
327 return parent_scope->break_is_for_switchcase();
328
329 return false;
330 }
331
332 int prog_scope::id() const
333 {
334 return scope_id;
335 }
336
337 int prog_scope::begin() const
338 {
339 return scope_begin;
340 }
341
342 int prog_scope::end() const
343 {
344 return scope_end;
345 }
346
347 void prog_scope::set_end(int end)
348 {
349 if (scope_end == -1)
350 scope_end = end;
351 }
352
353 void prog_scope::set_loop_break_line(int line)
354 {
355 if (scope_type == loop_body) {
356 break_loop_line = MIN2(break_loop_line, line);
357 } else {
358 if (parent_scope)
359 parent()->set_loop_break_line(line);
360 }
361 }
362
363 int prog_scope::loop_break_line() const
364 {
365 return break_loop_line;
366 }
367
368 temp_access::temp_access():
369 access_mask(0),
370 needs_component_tracking(false)
371 {
372 }
373
374 void temp_access::update_access_mask(int mask)
375 {
376 if (access_mask && access_mask != mask)
377 needs_component_tracking = true;
378 access_mask |= mask;
379 }
380
381 void temp_access::record_write(int line, prog_scope *scope, int writemask)
382 {
383 update_access_mask(writemask);
384
385 if (writemask & WRITEMASK_X)
386 comp[0].record_write(line, scope);
387 if (writemask & WRITEMASK_Y)
388 comp[1].record_write(line, scope);
389 if (writemask & WRITEMASK_Z)
390 comp[2].record_write(line, scope);
391 if (writemask & WRITEMASK_W)
392 comp[3].record_write(line, scope);
393 }
394
395 void temp_access::record_read(int line, prog_scope *scope, int swizzle)
396 {
397 int readmask = 0;
398 for (int idx = 0; idx < 4; ++idx) {
399 int swz = GET_SWZ(swizzle, idx);
400 readmask |= (1 << swz) & 0xF;
401 }
402 update_access_mask(readmask);
403
404 if (readmask & WRITEMASK_X)
405 comp[0].record_read(line, scope);
406 if (readmask & WRITEMASK_Y)
407 comp[1].record_read(line, scope);
408 if (readmask & WRITEMASK_Z)
409 comp[2].record_read(line, scope);
410 if (readmask & WRITEMASK_W)
411 comp[3].record_read(line, scope);
412 }
413
414 inline static lifetime make_lifetime(int b, int e)
415 {
416 lifetime lt;
417 lt.begin = b;
418 lt.end = e;
419 return lt;
420 }
421
422 lifetime temp_access::get_required_lifetime()
423 {
424 lifetime result = make_lifetime(-1, -1);
425
426 unsigned mask = access_mask;
427 while (mask) {
428 unsigned chan = u_bit_scan(&mask);
429 lifetime lt = comp[chan].get_required_lifetime();
430
431 if (lt.begin >= 0) {
432 if ((result.begin < 0) || (result.begin > lt.begin))
433 result.begin = lt.begin;
434 }
435
436 if (lt.end > result.end)
437 result.end = lt.end;
438
439 if (!needs_component_tracking)
440 break;
441 }
442 return result;
443 }
444
445 temp_comp_access::temp_comp_access():
446 last_read_scope(nullptr),
447 first_read_scope(nullptr),
448 first_write_scope(nullptr),
449 first_write(-1),
450 last_read(-1),
451 last_write(-1),
452 first_read(numeric_limits<int>::max())
453 {
454 }
455
456 void temp_comp_access::record_read(int line, prog_scope *scope)
457 {
458 last_read_scope = scope;
459 last_read = line;
460
461 if (first_read > line) {
462 first_read = line;
463 first_read_scope = scope;
464 }
465 }
466
467 void temp_comp_access::record_write(int line, prog_scope *scope)
468 {
469 last_write = line;
470
471 if (first_write < 0) {
472 first_write = line;
473 first_write_scope = scope;
474 }
475 }
476
477 void temp_comp_access::propagate_lifetime_to_dominant_write_scope()
478 {
479 first_write = first_write_scope->begin();
480 int lr = first_write_scope->end();
481
482 if (last_read < lr)
483 last_read = lr;
484 }
485
486 lifetime temp_comp_access::get_required_lifetime()
487 {
488 bool keep_for_full_loop = false;
489
490 /* This register component is not used at all, or only read,
491 * mark it as unused and ignore it when renaming.
492 * glsl_to_tgsi_visitor::renumber_registers will take care of
493 * eliminating registers that are not written to.
494 */
495 if (last_write < 0)
496 return make_lifetime(-1, -1);
497
498 assert(first_write_scope);
499
500 /* Only written to, just make sure the register component is not
501 * reused in the range it is used to write to
502 */
503 if (!last_read_scope)
504 return make_lifetime(first_write, last_write + 1);
505
506 const prog_scope *enclosing_scope_first_read = first_read_scope;
507 const prog_scope *enclosing_scope_first_write = first_write_scope;
508
509 /* We read before writing in a loop
510 * hence the value must survive the loops
511 */
512 if ((first_read <= first_write) &&
513 first_read_scope->is_in_loop()) {
514 keep_for_full_loop = true;
515 enclosing_scope_first_read = first_read_scope->outermost_loop();
516 }
517
518 /* A conditional write within a nested loop must survive
519 * the outermost loop, but only if it is read outside
520 * the condition scope where we write.
521 */
522 const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional();
523 if (conditional && conditional->is_in_loop() &&
524 !conditional->contains_range_of(*last_read_scope)) {
525 keep_for_full_loop = true;
526 enclosing_scope_first_write = conditional->outermost_loop();
527 }
528
529 /* Evaluate the scope that is shared by all: required first write scope,
530 * required first read before write scope, and last read scope.
531 */
532 const prog_scope *enclosing_scope = enclosing_scope_first_read;
533 if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
534 enclosing_scope = enclosing_scope_first_write;
535
536 if (enclosing_scope_first_read->contains_range_of(*enclosing_scope))
537 enclosing_scope = enclosing_scope_first_read;
538
539 while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
540 !enclosing_scope->contains_range_of(*last_read_scope)) {
541 enclosing_scope = enclosing_scope->parent();
542 assert(enclosing_scope);
543 }
544
545 /* Propagate the last read scope to the target scope */
546 while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
547 /* If the read is in a loop and we have to move up the scope we need to
548 * extend the life time to the end of this current loop because at this
549 * point we don't know whether the component was written before
550 * un-conditionally in the same loop.
551 */
552 if (last_read_scope->is_loop())
553 last_read = last_read_scope->end();
554
555 last_read_scope = last_read_scope->parent();
556 }
557
558 /* If the variable has to be kept for the whole loop, and we
559 * are currently in a loop, then propagate the life time.
560 */
561 if (keep_for_full_loop && first_write_scope->is_loop())
562 propagate_lifetime_to_dominant_write_scope();
563
564 /* Propagate the first_dominant_write scope to the target scope */
565 while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
566 /* Propagate lifetime if there was a break in a loop and the write was
567 * after the break inside that loop. Note, that this is only needed if
568 * we move up in the scopes.
569 */
570 if (first_write_scope->loop_break_line() < first_write) {
571 keep_for_full_loop = true;
572 propagate_lifetime_to_dominant_write_scope();
573 }
574
575 first_write_scope = first_write_scope->parent();
576
577 /* Propagte lifetime if we are now in a loop */
578 if (keep_for_full_loop && first_write_scope->is_loop())
579 propagate_lifetime_to_dominant_write_scope();
580 }
581
582 /* The last write past the last read is dead code, but we have to
583 * ensure that the component is not reused too early, hence extend the
584 * lifetime past the last write.
585 */
586 if (last_write >= last_read)
587 last_read = last_write + 1;
588
589 /* Here we are at the same scope, all is resolved */
590 return make_lifetime(first_write, last_read);
591 }
592
593 /* Helper class for sorting and searching the registers based
594 * on life times. */
595 struct access_record {
596 int begin;
597 int end;
598 int reg;
599 bool erase;
600
601 bool operator < (const access_record& rhs) const {
602 return begin < rhs.begin;
603 }
604 };
605
606 }
607
608 #ifndef NDEBUG
609 /* Function used for debugging. */
610 static void dump_instruction(int line, prog_scope *scope,
611 const glsl_to_tgsi_instruction& inst);
612 #endif
613
614 /* Scan the program and estimate the required register life times.
615 * The array lifetimes must be pre-allocated
616 */
617 bool
618 get_temp_registers_required_lifetimes(void *mem_ctx, exec_list *instructions,
619 int ntemps, struct lifetime *lifetimes)
620 {
621 int line = 0;
622 int loop_id = 0;
623 int if_id = 0;
624 int switch_id = 0;
625 bool is_at_end = false;
626 int n_scopes = 1;
627
628 /* Count scopes to allocate the needed space without the need for
629 * re-allocation
630 */
631 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
632 if (inst->op == TGSI_OPCODE_BGNLOOP ||
633 inst->op == TGSI_OPCODE_SWITCH ||
634 inst->op == TGSI_OPCODE_CASE ||
635 inst->op == TGSI_OPCODE_IF ||
636 inst->op == TGSI_OPCODE_UIF ||
637 inst->op == TGSI_OPCODE_ELSE ||
638 inst->op == TGSI_OPCODE_DEFAULT)
639 ++n_scopes;
640 }
641
642 prog_scope_storage scopes(mem_ctx, n_scopes);
643 temp_access *acc = new temp_access[ntemps];
644
645 prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line);
646
647 RENAME_DEBUG(cerr << "========= Begin shader ============\n");
648
649 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
650 if (is_at_end) {
651 assert(!"GLSL_TO_TGSI: shader has instructions past end marker");
652 break;
653 }
654
655 RENAME_DEBUG(dump_instruction(line, cur_scope, *inst));
656
657 switch (inst->op) {
658 case TGSI_OPCODE_BGNLOOP: {
659 cur_scope = scopes.create(cur_scope, loop_body, loop_id++,
660 cur_scope->nesting_depth() + 1, line);
661 break;
662 }
663 case TGSI_OPCODE_ENDLOOP: {
664 cur_scope->set_end(line);
665 cur_scope = cur_scope->parent();
666 assert(cur_scope);
667 break;
668 }
669 case TGSI_OPCODE_IF:
670 case TGSI_OPCODE_UIF: {
671 assert(num_inst_src_regs(inst) == 1);
672 const st_src_reg& src = inst->src[0];
673 if (src.file == PROGRAM_TEMPORARY)
674 acc[src.index].record_read(line, cur_scope, src.swizzle);
675 cur_scope = scopes.create(cur_scope, if_branch, if_id++,
676 cur_scope->nesting_depth() + 1, line + 1);
677 break;
678 }
679 case TGSI_OPCODE_ELSE: {
680 assert(cur_scope->type() == if_branch);
681 cur_scope->set_end(line - 1);
682 cur_scope = scopes.create(cur_scope->parent(), else_branch,
683 cur_scope->id(), cur_scope->nesting_depth(),
684 line + 1);
685 break;
686 }
687 case TGSI_OPCODE_END: {
688 cur_scope->set_end(line);
689 is_at_end = true;
690 break;
691 }
692 case TGSI_OPCODE_ENDIF: {
693 cur_scope->set_end(line - 1);
694 cur_scope = cur_scope->parent();
695 assert(cur_scope);
696 break;
697 }
698 case TGSI_OPCODE_SWITCH: {
699 assert(num_inst_src_regs(inst) == 1);
700 const st_src_reg& src = inst->src[0];
701 prog_scope *scope = scopes.create(cur_scope, switch_body, switch_id++,
702 cur_scope->nesting_depth() + 1, line);
703 /* We record the read only for the SWITCH statement itself, like it
704 * is used by the only consumer of TGSI_OPCODE_SWITCH in tgsi_exec.c.
705 */
706 if (src.file == PROGRAM_TEMPORARY)
707 acc[src.index].record_read(line, cur_scope, src.swizzle);
708 cur_scope = scope;
709 break;
710 }
711 case TGSI_OPCODE_ENDSWITCH: {
712 cur_scope->set_end(line - 1);
713 /* Remove the case level, it might not have been
714 * closed with a break.
715 */
716 if (cur_scope->type() != switch_body)
717 cur_scope = cur_scope->parent();
718
719 cur_scope = cur_scope->parent();
720 assert(cur_scope);
721 break;
722 }
723 case TGSI_OPCODE_CASE: {
724 /* Take care of tracking the registers. */
725 prog_scope *switch_scope = cur_scope->type() == switch_body ?
726 cur_scope : cur_scope->parent();
727
728 assert(num_inst_src_regs(inst) == 1);
729 const st_src_reg& src = inst->src[0];
730 if (src.file == PROGRAM_TEMPORARY)
731 acc[src.index].record_read(line, switch_scope, src.swizzle);
732
733 /* Fall through to allocate the scope. */
734 }
735 case TGSI_OPCODE_DEFAULT: {
736 prog_scope_type t = inst->op == TGSI_OPCODE_CASE ? switch_case_branch
737 : switch_default_branch;
738 prog_scope *switch_scope = (cur_scope->type() == switch_body) ?
739 cur_scope : cur_scope->parent();
740 assert(switch_scope->type() == switch_body);
741 prog_scope *scope = scopes.create(switch_scope, t,
742 switch_scope->id(),
743 switch_scope->nesting_depth() + 1,
744 line);
745 /* Previous case falls through, so scope was not yet closed. */
746 if ((cur_scope != switch_scope) && (cur_scope->end() == -1))
747 cur_scope->set_end(line - 1);
748 cur_scope = scope;
749 break;
750 }
751 case TGSI_OPCODE_BRK: {
752 if (cur_scope->break_is_for_switchcase()) {
753 cur_scope->set_end(line - 1);
754 } else {
755 cur_scope->set_loop_break_line(line);
756 }
757 break;
758 }
759 case TGSI_OPCODE_CAL:
760 case TGSI_OPCODE_RET:
761 /* These opcodes are not supported and if a subroutine would
762 * be called in a shader, then the lifetime tracking would have
763 * to follow that call to see which registers are used there.
764 * Since this is not done, we have to bail out here and signal
765 * that no register merge will take place.
766 */
767 return false;
768 default: {
769 for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
770 const st_src_reg& src = inst->src[j];
771 if (src.file == PROGRAM_TEMPORARY)
772 acc[src.index].record_read(line, cur_scope, src.swizzle);
773 }
774 for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
775 const st_src_reg& src = inst->tex_offsets[j];
776 if (src.file == PROGRAM_TEMPORARY)
777 acc[src.index].record_read(line, cur_scope, src.swizzle);
778 }
779 for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
780 const st_dst_reg& dst = inst->dst[j];
781 if (dst.file == PROGRAM_TEMPORARY)
782 acc[dst.index].record_write(line, cur_scope, dst.writemask);
783 }
784 }
785 }
786 ++line;
787 }
788
789 RENAME_DEBUG(cerr << "==================================\n\n");
790
791 /* Make sure last scope is closed, even though no
792 * TGSI_OPCODE_END was given.
793 */
794 if (cur_scope->end() < 0)
795 cur_scope->set_end(line - 1);
796
797 RENAME_DEBUG(cerr << "========= lifetimes ==============\n");
798 for(int i = 0; i < ntemps; ++i) {
799 RENAME_DEBUG(cerr << setw(4) << i);
800 lifetimes[i] = acc[i].get_required_lifetime();
801 RENAME_DEBUG(cerr << ": [" << lifetimes[i].begin << ", "
802 << lifetimes[i].end << "]\n");
803 }
804 RENAME_DEBUG(cerr << "==================================\n\n");
805
806 delete[] acc;
807 return true;
808 }
809
810 /* Find the next register between [start, end) that has a life time starting
811 * at or after bound by using a binary search.
812 * start points at the beginning of the search range,
813 * end points at the element past the end of the search range, and
814 * the array comprising [start, end) must be sorted in ascending order.
815 */
816 static access_record*
817 find_next_rename(access_record* start, access_record* end, int bound)
818 {
819 int delta = (end - start);
820
821 while (delta > 0) {
822 int half = delta >> 1;
823 access_record* middle = start + half;
824
825 if (bound <= middle->begin) {
826 delta = half;
827 } else {
828 start = middle;
829 ++start;
830 delta -= half + 1;
831 }
832 }
833
834 return start;
835 }
836
837 #ifndef USE_STL_SORT
838 static int access_record_compare (const void *a, const void *b) {
839 const access_record *aa = static_cast<const access_record*>(a);
840 const access_record *bb = static_cast<const access_record*>(b);
841 return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0);
842 }
843 #endif
844
845 /* This functions evaluates the register merges by using a binary
846 * search to find suitable merge candidates. */
847 void get_temp_registers_remapping(void *mem_ctx, int ntemps,
848 const struct lifetime* lifetimes,
849 struct rename_reg_pair *result)
850 {
851 access_record *reg_access = ralloc_array(mem_ctx, access_record, ntemps);
852
853 int used_temps = 0;
854 for (int i = 0; i < ntemps; ++i) {
855 if (lifetimes[i].begin >= 0) {
856 reg_access[used_temps].begin = lifetimes[i].begin;
857 reg_access[used_temps].end = lifetimes[i].end;
858 reg_access[used_temps].reg = i;
859 reg_access[used_temps].erase = false;
860 ++used_temps;
861 }
862 }
863
864 #ifdef USE_STL_SORT
865 std::sort(reg_access, reg_access + used_temps);
866 #else
867 std::qsort(reg_access, used_temps, sizeof(access_record), access_record_compare);
868 #endif
869
870 access_record *trgt = reg_access;
871 access_record *reg_access_end = reg_access + used_temps;
872 access_record *first_erase = reg_access_end;
873 access_record *search_start = trgt + 1;
874
875 while (trgt != reg_access_end) {
876 access_record *src = find_next_rename(search_start, reg_access_end,
877 trgt->end);
878 if (src != reg_access_end) {
879 result[src->reg].new_reg = trgt->reg;
880 result[src->reg].valid = true;
881 trgt->end = src->end;
882
883 /* Since we only search forward, don't remove the renamed
884 * register just now, only mark it. */
885 src->erase = true;
886
887 if (first_erase == reg_access_end)
888 first_erase = src;
889
890 search_start = src + 1;
891 } else {
892 /* Moving to the next target register it is time to remove
893 * the already merged registers from the search range */
894 if (first_erase != reg_access_end) {
895 access_record *outp = first_erase;
896 access_record *inp = first_erase + 1;
897
898 while (inp != reg_access_end) {
899 if (!inp->erase)
900 *outp++ = *inp;
901 ++inp;
902 }
903
904 reg_access_end = outp;
905 first_erase = reg_access_end;
906 }
907 ++trgt;
908 search_start = trgt + 1;
909 }
910 }
911 ralloc_free(reg_access);
912 }
913
914 /* Code below used for debugging */
915 #ifndef NDEBUG
916 static const char swizzle_txt[] = "xyzw";
917
918 static const char *tgsi_file_names[PROGRAM_FILE_MAX] = {
919 "TEMP", "ARRAY", "IN", "OUT", "STATE", "CONST",
920 "UNIFORM", "WO", "ADDR", "SAMPLER", "SV", "UNDEF",
921 "IMM", "BUF", "MEM", "IMAGE"
922 };
923
924 static
925 void dump_instruction(int line, prog_scope *scope,
926 const glsl_to_tgsi_instruction& inst)
927 {
928 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst.op);
929
930 int indent = scope->nesting_depth();
931 if ((scope->type() == switch_case_branch ||
932 scope->type() == switch_default_branch) &&
933 (info->opcode == TGSI_OPCODE_CASE ||
934 info->opcode == TGSI_OPCODE_DEFAULT))
935 --indent;
936
937 if (info->opcode == TGSI_OPCODE_ENDIF ||
938 info->opcode == TGSI_OPCODE_ELSE ||
939 info->opcode == TGSI_OPCODE_ENDLOOP ||
940 info->opcode == TGSI_OPCODE_ENDSWITCH)
941 --indent;
942
943 cerr << setw(4) << line << ": ";
944 for (int i = 0; i < indent; ++i)
945 cerr << " ";
946 cerr << tgsi_get_opcode_name(info->opcode) << " ";
947
948 bool has_operators = false;
949 for (unsigned j = 0; j < num_inst_dst_regs(&inst); j++) {
950 has_operators = true;
951 if (j > 0)
952 cerr << ", ";
953
954 const st_dst_reg& dst = inst.dst[j];
955 cerr << tgsi_file_names[dst.file];
956
957 if (dst.file == PROGRAM_ARRAY)
958 cerr << "(" << dst.array_id << ")";
959
960 cerr << "[" << dst.index << "]";
961
962 if (dst.writemask != TGSI_WRITEMASK_XYZW) {
963 cerr << ".";
964 if (dst.writemask & TGSI_WRITEMASK_X) cerr << "x";
965 if (dst.writemask & TGSI_WRITEMASK_Y) cerr << "y";
966 if (dst.writemask & TGSI_WRITEMASK_Z) cerr << "z";
967 if (dst.writemask & TGSI_WRITEMASK_W) cerr << "w";
968 }
969 }
970 if (has_operators)
971 cerr << " := ";
972
973 for (unsigned j = 0; j < num_inst_src_regs(&inst); j++) {
974 if (j > 0)
975 cerr << ", ";
976
977 const st_src_reg& src = inst.src[j];
978 cerr << tgsi_file_names[src.file]
979 << "[" << src.index << "]";
980 if (src.swizzle != SWIZZLE_XYZW) {
981 cerr << ".";
982 for (int idx = 0; idx < 4; ++idx) {
983 int swz = GET_SWZ(src.swizzle, idx);
984 if (swz < 4) {
985 cerr << swizzle_txt[swz];
986 }
987 }
988 }
989 }
990
991 if (inst.tex_offset_num_offset > 0) {
992 cerr << ", TEXOFS: ";
993 for (unsigned j = 0; j < inst.tex_offset_num_offset; j++) {
994 if (j > 0)
995 cerr << ", ";
996
997 const st_src_reg& src = inst.tex_offsets[j];
998 cerr << tgsi_file_names[src.file]
999 << "[" << src.index << "]";
1000 if (src.swizzle != SWIZZLE_XYZW) {
1001 cerr << ".";
1002 for (int idx = 0; idx < 4; ++idx) {
1003 int swz = GET_SWZ(src.swizzle, idx);
1004 if (swz < 4) {
1005 cerr << swizzle_txt[swz];
1006 }
1007 }
1008 }
1009 }
1010 }
1011 cerr << "\n";
1012 }
1013 #endif