st/glsl_to_tgsi: be precise about merging scopes
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi_temprename.cpp
1 /*
2 * Copyright © 2017 Gert Wollny
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "st_glsl_to_tgsi_temprename.h"
25 #include <tgsi/tgsi_info.h>
26 #include <tgsi/tgsi_strings.h>
27 #include <program/prog_instruction.h>
28 #include <limits>
29 #include <cstdlib>
30
31 /* std::sort is significantly faster than qsort */
32 #define USE_STL_SORT
33 #ifdef USE_STL_SORT
34 #include <algorithm>
35 #endif
36
37 #ifndef NDEBUG
38 #include <iostream>
39 #include <iomanip>
40 #include <program/prog_print.h>
41 #include <util/debug.h>
42 using std::cerr;
43 using std::setw;
44 #endif
45
46 /* If <windows.h> is included this is defined and clashes with
47 * std::numeric_limits<>::max()
48 */
49 #ifdef max
50 #undef max
51 #endif
52
53 using std::numeric_limits;
54
55 /* Without c++11 define the nullptr for forward-compatibility
56 * and better readibility */
57 #if __cplusplus < 201103L
58 #define nullptr 0
59 #endif
60
61 #ifndef NDEBUG
62 /* Helper function to check whether we want to seen debugging output */
63 static inline bool is_debug_enabled ()
64 {
65 static int debug_enabled = -1;
66 if (debug_enabled < 0)
67 debug_enabled = env_var_as_boolean("GLSL_TO_TGSI_RENAME_DEBUG", false);
68 return debug_enabled > 0;
69 }
70 #define RENAME_DEBUG(X) if (is_debug_enabled()) do { X; } while (false);
71 #else
72 #define RENAME_DEBUG(X)
73 #endif
74
75 namespace {
76
77 enum prog_scope_type {
78 outer_scope, /* Outer program scope */
79 loop_body, /* Inside a loop */
80 if_branch, /* Inside if branch */
81 else_branch, /* Inside else branch */
82 switch_body, /* Inside switch statmenet */
83 switch_case_branch, /* Inside switch case statmenet */
84 switch_default_branch, /* Inside switch default statmenet */
85 undefined_scope
86 };
87
88 class prog_scope {
89 public:
90 prog_scope(prog_scope *parent, prog_scope_type type, int id,
91 int depth, int begin);
92
93 prog_scope_type type() const;
94 prog_scope *parent() const;
95 int nesting_depth() const;
96 int id() const;
97 int end() const;
98 int begin() const;
99 int loop_break_line() const;
100
101 const prog_scope *in_ifelse_scope() const;
102 const prog_scope *in_switchcase_scope() const;
103 const prog_scope *innermost_loop() const;
104 const prog_scope *outermost_loop() const;
105 const prog_scope *enclosing_conditional() const;
106
107 bool is_loop() const;
108 bool is_in_loop() const;
109 bool is_conditional() const;
110 bool is_conditional_in_loop() const;
111
112 bool break_is_for_switchcase() const;
113 bool contains_range_of(const prog_scope& other) const;
114 const st_src_reg *switch_register() const;
115
116 void set_end(int end);
117 void set_loop_break_line(int line);
118
119 private:
120 prog_scope_type scope_type;
121 int scope_id;
122 int scope_nesting_depth;
123 int scope_begin;
124 int scope_end;
125 int break_loop_line;
126 prog_scope *parent_scope;
127 const st_src_reg *switch_reg;
128 };
129
130 /* Some storage class to encapsulate the prog_scope (de-)allocations */
131 class prog_scope_storage {
132 public:
133 prog_scope_storage(void *mem_ctx, int n);
134 ~prog_scope_storage();
135 prog_scope * create(prog_scope *p, prog_scope_type type, int id,
136 int lvl, int s_begin);
137 private:
138 void *mem_ctx;
139 int current_slot;
140 prog_scope *storage;
141 };
142
143 class temp_comp_access {
144 public:
145 temp_comp_access();
146 void record_read(int line, prog_scope *scope);
147 void record_write(int line, prog_scope *scope);
148 lifetime get_required_lifetime();
149 private:
150 void propagate_lifetime_to_dominant_write_scope();
151
152 prog_scope *last_read_scope;
153 prog_scope *first_read_scope;
154 prog_scope *first_write_scope;
155 int first_write;
156 int last_read;
157 int last_write;
158 int first_read;
159 bool keep_for_full_loop;
160 };
161
162 class temp_access {
163 public:
164 temp_access();
165 void record_read(int line, prog_scope *scope, int swizzle);
166 void record_write(int line, prog_scope *scope, int writemask);
167 lifetime get_required_lifetime();
168 private:
169 void update_access_mask(int mask);
170
171 temp_comp_access comp[4];
172 int access_mask;
173 bool needs_component_tracking;
174 };
175
176 prog_scope_storage::prog_scope_storage(void *mc, int n):
177 mem_ctx(mc),
178 current_slot(0)
179 {
180 storage = ralloc_array(mem_ctx, prog_scope, n);
181 }
182
183 prog_scope_storage::~prog_scope_storage()
184 {
185 ralloc_free(storage);
186 }
187
188 prog_scope*
189 prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id,
190 int lvl, int s_begin)
191 {
192 storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
193 return &storage[current_slot++];
194 }
195
196 prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id,
197 int depth, int scope_begin):
198 scope_type(type),
199 scope_id(id),
200 scope_nesting_depth(depth),
201 scope_begin(scope_begin),
202 scope_end(-1),
203 break_loop_line(numeric_limits<int>::max()),
204 parent_scope(parent),
205 switch_reg(nullptr)
206 {
207 }
208
209 prog_scope_type prog_scope::type() const
210 {
211 return scope_type;
212 }
213
214 prog_scope *prog_scope::parent() const
215 {
216 return parent_scope;
217 }
218
219 int prog_scope::nesting_depth() const
220 {
221 return scope_nesting_depth;
222 }
223
224 bool prog_scope::is_loop() const
225 {
226 return (scope_type == loop_body);
227 }
228
229 bool prog_scope::is_in_loop() const
230 {
231 if (scope_type == loop_body)
232 return true;
233
234 if (parent_scope)
235 return parent_scope->is_in_loop();
236
237 return false;
238 }
239
240 bool prog_scope::is_conditional_in_loop() const
241 {
242 return is_conditional() && is_in_loop();
243 }
244
245 const prog_scope *prog_scope::innermost_loop() const
246 {
247 if (scope_type == loop_body)
248 return this;
249
250 if (parent_scope)
251 return parent_scope->innermost_loop();
252
253 return nullptr;
254 }
255
256 const prog_scope *prog_scope::outermost_loop() const
257 {
258 const prog_scope *loop = nullptr;
259 const prog_scope *p = this;
260
261 do {
262 if (p->type() == loop_body)
263 loop = p;
264 p = p->parent();
265 } while (p);
266
267 return loop;
268 }
269
270 const prog_scope *prog_scope::enclosing_conditional() const
271 {
272 if (is_conditional())
273 return this;
274
275 if (parent_scope)
276 return parent_scope->enclosing_conditional();
277
278 return nullptr;
279 }
280
281 bool prog_scope::contains_range_of(const prog_scope& other) const
282 {
283 return (begin() <= other.begin()) && (end() >= other.end());
284 }
285
286 bool prog_scope::is_conditional() const
287 {
288 return scope_type == if_branch ||
289 scope_type == else_branch ||
290 scope_type == switch_case_branch ||
291 scope_type == switch_default_branch;
292 }
293
294 const prog_scope *prog_scope::in_ifelse_scope() const
295 {
296 if (scope_type == if_branch ||
297 scope_type == else_branch)
298 return this;
299
300 if (parent_scope)
301 return parent_scope->in_ifelse_scope();
302
303 return nullptr;
304 }
305
306 const st_src_reg *prog_scope::switch_register() const
307 {
308 return switch_reg;
309 }
310
311 const prog_scope *prog_scope::in_switchcase_scope() const
312 {
313 if (scope_type == switch_case_branch ||
314 scope_type == switch_default_branch)
315 return this;
316
317 if (parent_scope)
318 return parent_scope->in_switchcase_scope();
319
320 return nullptr;
321 }
322
323 bool prog_scope::break_is_for_switchcase() const
324 {
325 if (scope_type == loop_body)
326 return false;
327
328 if (scope_type == switch_case_branch ||
329 scope_type == switch_default_branch ||
330 scope_type == switch_body)
331 return true;
332
333 if (parent_scope)
334 return parent_scope->break_is_for_switchcase();
335
336 return false;
337 }
338
339 int prog_scope::id() const
340 {
341 return scope_id;
342 }
343
344 int prog_scope::begin() const
345 {
346 return scope_begin;
347 }
348
349 int prog_scope::end() const
350 {
351 return scope_end;
352 }
353
354 void prog_scope::set_end(int end)
355 {
356 if (scope_end == -1)
357 scope_end = end;
358 }
359
360 void prog_scope::set_loop_break_line(int line)
361 {
362 if (scope_type == loop_body) {
363 break_loop_line = MIN2(break_loop_line, line);
364 } else {
365 if (parent_scope)
366 parent()->set_loop_break_line(line);
367 }
368 }
369
370 int prog_scope::loop_break_line() const
371 {
372 return break_loop_line;
373 }
374
375 temp_access::temp_access():
376 access_mask(0),
377 needs_component_tracking(false)
378 {
379 }
380
381 void temp_access::update_access_mask(int mask)
382 {
383 if (access_mask && access_mask != mask)
384 needs_component_tracking = true;
385 access_mask |= mask;
386 }
387
388 void temp_access::record_write(int line, prog_scope *scope, int writemask)
389 {
390 update_access_mask(writemask);
391
392 if (writemask & WRITEMASK_X)
393 comp[0].record_write(line, scope);
394 if (writemask & WRITEMASK_Y)
395 comp[1].record_write(line, scope);
396 if (writemask & WRITEMASK_Z)
397 comp[2].record_write(line, scope);
398 if (writemask & WRITEMASK_W)
399 comp[3].record_write(line, scope);
400 }
401
402 void temp_access::record_read(int line, prog_scope *scope, int swizzle)
403 {
404 int readmask = 0;
405 for (int idx = 0; idx < 4; ++idx) {
406 int swz = GET_SWZ(swizzle, idx);
407 readmask |= (1 << swz) & 0xF;
408 }
409 update_access_mask(readmask);
410
411 if (readmask & WRITEMASK_X)
412 comp[0].record_read(line, scope);
413 if (readmask & WRITEMASK_Y)
414 comp[1].record_read(line, scope);
415 if (readmask & WRITEMASK_Z)
416 comp[2].record_read(line, scope);
417 if (readmask & WRITEMASK_W)
418 comp[3].record_read(line, scope);
419 }
420
421 inline static lifetime make_lifetime(int b, int e)
422 {
423 lifetime lt;
424 lt.begin = b;
425 lt.end = e;
426 return lt;
427 }
428
429 lifetime temp_access::get_required_lifetime()
430 {
431 lifetime result = make_lifetime(-1, -1);
432
433 unsigned mask = access_mask;
434 while (mask) {
435 unsigned chan = u_bit_scan(&mask);
436 lifetime lt = comp[chan].get_required_lifetime();
437
438 if (lt.begin >= 0) {
439 if ((result.begin < 0) || (result.begin > lt.begin))
440 result.begin = lt.begin;
441 }
442
443 if (lt.end > result.end)
444 result.end = lt.end;
445
446 if (!needs_component_tracking)
447 break;
448 }
449 return result;
450 }
451
452 temp_comp_access::temp_comp_access():
453 last_read_scope(nullptr),
454 first_read_scope(nullptr),
455 first_write_scope(nullptr),
456 first_write(-1),
457 last_read(-1),
458 last_write(-1),
459 first_read(numeric_limits<int>::max())
460 {
461 }
462
463 void temp_comp_access::record_read(int line, prog_scope *scope)
464 {
465 last_read_scope = scope;
466 last_read = line;
467
468 if (first_read > line) {
469 first_read = line;
470 first_read_scope = scope;
471 }
472 }
473
474 void temp_comp_access::record_write(int line, prog_scope *scope)
475 {
476 last_write = line;
477
478 if (first_write < 0) {
479 first_write = line;
480 first_write_scope = scope;
481 }
482 }
483
484 void temp_comp_access::propagate_lifetime_to_dominant_write_scope()
485 {
486 first_write = first_write_scope->begin();
487 int lr = first_write_scope->end();
488
489 if (last_read < lr)
490 last_read = lr;
491 }
492
493 lifetime temp_comp_access::get_required_lifetime()
494 {
495 bool keep_for_full_loop = false;
496
497 /* This register component is not used at all, or only read,
498 * mark it as unused and ignore it when renaming.
499 * glsl_to_tgsi_visitor::renumber_registers will take care of
500 * eliminating registers that are not written to.
501 */
502 if (last_write < 0)
503 return make_lifetime(-1, -1);
504
505 assert(first_write_scope);
506
507 /* Only written to, just make sure the register component is not
508 * reused in the range it is used to write to
509 */
510 if (!last_read_scope)
511 return make_lifetime(first_write, last_write + 1);
512
513 const prog_scope *enclosing_scope_first_read = first_read_scope;
514 const prog_scope *enclosing_scope_first_write = first_write_scope;
515
516 /* We read before writing in a loop
517 * hence the value must survive the loops
518 */
519 if ((first_read <= first_write) &&
520 first_read_scope->is_in_loop()) {
521 keep_for_full_loop = true;
522 enclosing_scope_first_read = first_read_scope->outermost_loop();
523 }
524
525 /* A conditional write within a nested loop must survive
526 * the outermost loop, but only if it is read outside
527 * the condition scope where we write.
528 */
529 const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional();
530 if (conditional && conditional->is_in_loop() &&
531 !conditional->contains_range_of(*last_read_scope)) {
532 keep_for_full_loop = true;
533 enclosing_scope_first_write = conditional->outermost_loop();
534 }
535
536 /* Evaluate the scope that is shared by all: required first write scope,
537 * required first read before write scope, and last read scope.
538 */
539 const prog_scope *enclosing_scope = enclosing_scope_first_read;
540 if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
541 enclosing_scope = enclosing_scope_first_write;
542
543 if (last_read_scope->contains_range_of(*enclosing_scope))
544 enclosing_scope = last_read_scope;
545
546 while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
547 !enclosing_scope->contains_range_of(*last_read_scope)) {
548 enclosing_scope = enclosing_scope->parent();
549 assert(enclosing_scope);
550 }
551
552 /* Propagate the last read scope to the target scope */
553 while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
554 /* If the read is in a loop and we have to move up the scope we need to
555 * extend the life time to the end of this current loop because at this
556 * point we don't know whether the component was written before
557 * un-conditionally in the same loop.
558 */
559 if (last_read_scope->is_loop())
560 last_read = last_read_scope->end();
561
562 last_read_scope = last_read_scope->parent();
563 }
564
565 /* If the variable has to be kept for the whole loop, and we
566 * are currently in a loop, then propagate the life time.
567 */
568 if (keep_for_full_loop && first_write_scope->is_loop())
569 propagate_lifetime_to_dominant_write_scope();
570
571 /* Propagate the first_dominant_write scope to the target scope */
572 while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
573 /* Propagate lifetime if there was a break in a loop and the write was
574 * after the break inside that loop. Note, that this is only needed if
575 * we move up in the scopes.
576 */
577 if (first_write_scope->loop_break_line() < first_write) {
578 keep_for_full_loop = true;
579 propagate_lifetime_to_dominant_write_scope();
580 }
581
582 first_write_scope = first_write_scope->parent();
583
584 /* Propagte lifetime if we are now in a loop */
585 if (keep_for_full_loop && first_write_scope->is_loop())
586 propagate_lifetime_to_dominant_write_scope();
587 }
588
589 /* The last write past the last read is dead code, but we have to
590 * ensure that the component is not reused too early, hence extend the
591 * lifetime past the last write.
592 */
593 if (last_write >= last_read)
594 last_read = last_write + 1;
595
596 /* Here we are at the same scope, all is resolved */
597 return make_lifetime(first_write, last_read);
598 }
599
600 /* Helper class for sorting and searching the registers based
601 * on life times. */
602 class access_record {
603 public:
604 int begin;
605 int end;
606 int reg;
607 bool erase;
608
609 bool operator < (const access_record& rhs) const {
610 return begin < rhs.begin;
611 }
612 };
613
614 }
615
616 #ifndef NDEBUG
617 /* Function used for debugging. */
618 static void dump_instruction(int line, prog_scope *scope,
619 const glsl_to_tgsi_instruction& inst);
620 #endif
621
622 /* Scan the program and estimate the required register life times.
623 * The array lifetimes must be pre-allocated
624 */
625 bool
626 get_temp_registers_required_lifetimes(void *mem_ctx, exec_list *instructions,
627 int ntemps, struct lifetime *lifetimes)
628 {
629 int line = 0;
630 int loop_id = 0;
631 int if_id = 0;
632 int switch_id = 0;
633 bool is_at_end = false;
634 int n_scopes = 1;
635
636 /* Count scopes to allocate the needed space without the need for
637 * re-allocation
638 */
639 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
640 if (inst->op == TGSI_OPCODE_BGNLOOP ||
641 inst->op == TGSI_OPCODE_SWITCH ||
642 inst->op == TGSI_OPCODE_CASE ||
643 inst->op == TGSI_OPCODE_IF ||
644 inst->op == TGSI_OPCODE_UIF ||
645 inst->op == TGSI_OPCODE_ELSE ||
646 inst->op == TGSI_OPCODE_DEFAULT)
647 ++n_scopes;
648 }
649
650 prog_scope_storage scopes(mem_ctx, n_scopes);
651 temp_access *acc = new temp_access[ntemps];
652
653 prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line);
654
655 RENAME_DEBUG(cerr << "========= Begin shader ============\n");
656
657 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
658 if (is_at_end) {
659 assert(!"GLSL_TO_TGSI: shader has instructions past end marker");
660 break;
661 }
662
663 RENAME_DEBUG(dump_instruction(line, cur_scope, *inst));
664
665 switch (inst->op) {
666 case TGSI_OPCODE_BGNLOOP: {
667 cur_scope = scopes.create(cur_scope, loop_body, loop_id++,
668 cur_scope->nesting_depth() + 1, line);
669 break;
670 }
671 case TGSI_OPCODE_ENDLOOP: {
672 cur_scope->set_end(line);
673 cur_scope = cur_scope->parent();
674 assert(cur_scope);
675 break;
676 }
677 case TGSI_OPCODE_IF:
678 case TGSI_OPCODE_UIF: {
679 assert(num_inst_src_regs(inst) == 1);
680 const st_src_reg& src = inst->src[0];
681 if (src.file == PROGRAM_TEMPORARY)
682 acc[src.index].record_read(line, cur_scope, src.swizzle);
683 cur_scope = scopes.create(cur_scope, if_branch, if_id++,
684 cur_scope->nesting_depth() + 1, line + 1);
685 break;
686 }
687 case TGSI_OPCODE_ELSE: {
688 assert(cur_scope->type() == if_branch);
689 cur_scope->set_end(line - 1);
690 cur_scope = scopes.create(cur_scope->parent(), else_branch,
691 cur_scope->id(), cur_scope->nesting_depth(),
692 line + 1);
693 break;
694 }
695 case TGSI_OPCODE_END: {
696 cur_scope->set_end(line);
697 is_at_end = true;
698 break;
699 }
700 case TGSI_OPCODE_ENDIF: {
701 cur_scope->set_end(line - 1);
702 cur_scope = cur_scope->parent();
703 assert(cur_scope);
704 break;
705 }
706 case TGSI_OPCODE_SWITCH: {
707 assert(num_inst_src_regs(inst) == 1);
708 const st_src_reg& src = inst->src[0];
709 prog_scope *scope = scopes.create(cur_scope, switch_body, switch_id++,
710 cur_scope->nesting_depth() + 1, line);
711 /* We record the read only for the SWITCH statement itself, like it
712 * is used by the only consumer of TGSI_OPCODE_SWITCH in tgsi_exec.c.
713 */
714 if (src.file == PROGRAM_TEMPORARY)
715 acc[src.index].record_read(line, cur_scope, src.swizzle);
716 cur_scope = scope;
717 break;
718 }
719 case TGSI_OPCODE_ENDSWITCH: {
720 cur_scope->set_end(line - 1);
721 /* Remove the case level, it might not have been
722 * closed with a break.
723 */
724 if (cur_scope->type() != switch_body)
725 cur_scope = cur_scope->parent();
726
727 cur_scope = cur_scope->parent();
728 assert(cur_scope);
729 break;
730 }
731 case TGSI_OPCODE_CASE: {
732 /* Take care of tracking the registers. */
733 prog_scope *switch_scope = cur_scope->type() == switch_body ?
734 cur_scope : cur_scope->parent();
735
736 assert(num_inst_src_regs(inst) == 1);
737 const st_src_reg& src = inst->src[0];
738 if (src.file == PROGRAM_TEMPORARY)
739 acc[src.index].record_read(line, switch_scope, src.swizzle);
740
741 /* Fall through to allocate the scope. */
742 }
743 case TGSI_OPCODE_DEFAULT: {
744 prog_scope_type t = inst->op == TGSI_OPCODE_CASE ? switch_case_branch
745 : switch_default_branch;
746 prog_scope *switch_scope = (cur_scope->type() == switch_body) ?
747 cur_scope : cur_scope->parent();
748 assert(switch_scope->type() == switch_body);
749 prog_scope *scope = scopes.create(switch_scope, t,
750 switch_scope->id(),
751 switch_scope->nesting_depth() + 1,
752 line);
753 /* Previous case falls through, so scope was not yet closed. */
754 if ((cur_scope != switch_scope) && (cur_scope->end() == -1))
755 cur_scope->set_end(line - 1);
756 cur_scope = scope;
757 break;
758 }
759 case TGSI_OPCODE_BRK: {
760 if (cur_scope->break_is_for_switchcase()) {
761 cur_scope->set_end(line - 1);
762 } else {
763 cur_scope->set_loop_break_line(line);
764 }
765 break;
766 }
767 case TGSI_OPCODE_CAL:
768 case TGSI_OPCODE_RET:
769 /* These opcodes are not supported and if a subroutine would
770 * be called in a shader, then the lifetime tracking would have
771 * to follow that call to see which registers are used there.
772 * Since this is not done, we have to bail out here and signal
773 * that no register merge will take place.
774 */
775 return false;
776 default: {
777 for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
778 const st_src_reg& src = inst->src[j];
779 if (src.file == PROGRAM_TEMPORARY)
780 acc[src.index].record_read(line, cur_scope, src.swizzle);
781 }
782 for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
783 const st_src_reg& src = inst->tex_offsets[j];
784 if (src.file == PROGRAM_TEMPORARY)
785 acc[src.index].record_read(line, cur_scope, src.swizzle);
786 }
787 for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
788 const st_dst_reg& dst = inst->dst[j];
789 if (dst.file == PROGRAM_TEMPORARY)
790 acc[dst.index].record_write(line, cur_scope, dst.writemask);
791 }
792 }
793 }
794 ++line;
795 }
796
797 RENAME_DEBUG(cerr << "==================================\n\n");
798
799 /* Make sure last scope is closed, even though no
800 * TGSI_OPCODE_END was given.
801 */
802 if (cur_scope->end() < 0)
803 cur_scope->set_end(line - 1);
804
805 RENAME_DEBUG(cerr << "========= lifetimes ==============\n");
806 for(int i = 0; i < ntemps; ++i) {
807 RENAME_DEBUG(cerr << setw(4) << i);
808 lifetimes[i] = acc[i].get_required_lifetime();
809 RENAME_DEBUG(cerr << ": [" << lifetimes[i].begin << ", "
810 << lifetimes[i].end << "]\n");
811 }
812 RENAME_DEBUG(cerr << "==================================\n\n");
813
814 delete[] acc;
815 return true;
816 }
817
818 /* Find the next register between [start, end) that has a life time starting
819 * at or after bound by using a binary search.
820 * start points at the beginning of the search range,
821 * end points at the element past the end of the search range, and
822 * the array comprising [start, end) must be sorted in ascending order.
823 */
824 static access_record*
825 find_next_rename(access_record* start, access_record* end, int bound)
826 {
827 int delta = (end - start);
828
829 while (delta > 0) {
830 int half = delta >> 1;
831 access_record* middle = start + half;
832
833 if (bound <= middle->begin) {
834 delta = half;
835 } else {
836 start = middle;
837 ++start;
838 delta -= half + 1;
839 }
840 }
841
842 return start;
843 }
844
845 #ifndef USE_STL_SORT
846 static int access_record_compare (const void *a, const void *b) {
847 const access_record *aa = static_cast<const access_record*>(a);
848 const access_record *bb = static_cast<const access_record*>(b);
849 return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0);
850 }
851 #endif
852
853 /* This functions evaluates the register merges by using a binary
854 * search to find suitable merge candidates. */
855 void get_temp_registers_remapping(void *mem_ctx, int ntemps,
856 const struct lifetime* lifetimes,
857 struct rename_reg_pair *result)
858 {
859 access_record *reg_access = ralloc_array(mem_ctx, access_record, ntemps);
860
861 int used_temps = 0;
862 for (int i = 0; i < ntemps; ++i) {
863 if (lifetimes[i].begin >= 0) {
864 reg_access[used_temps].begin = lifetimes[i].begin;
865 reg_access[used_temps].end = lifetimes[i].end;
866 reg_access[used_temps].reg = i;
867 reg_access[used_temps].erase = false;
868 ++used_temps;
869 }
870 }
871
872 #ifdef USE_STL_SORT
873 std::sort(reg_access, reg_access + used_temps);
874 #else
875 std::qsort(reg_access, used_temps, sizeof(access_record), access_record_compare);
876 #endif
877
878 access_record *trgt = reg_access;
879 access_record *reg_access_end = reg_access + used_temps;
880 access_record *first_erase = reg_access_end;
881 access_record *search_start = trgt + 1;
882
883 while (trgt != reg_access_end) {
884 access_record *src = find_next_rename(search_start, reg_access_end,
885 trgt->end);
886 if (src != reg_access_end) {
887 result[src->reg].new_reg = trgt->reg;
888 result[src->reg].valid = true;
889 trgt->end = src->end;
890
891 /* Since we only search forward, don't remove the renamed
892 * register just now, only mark it. */
893 src->erase = true;
894
895 if (first_erase == reg_access_end)
896 first_erase = src;
897
898 search_start = src + 1;
899 } else {
900 /* Moving to the next target register it is time to remove
901 * the already merged registers from the search range */
902 if (first_erase != reg_access_end) {
903 access_record *outp = first_erase;
904 access_record *inp = first_erase + 1;
905
906 while (inp != reg_access_end) {
907 if (!inp->erase)
908 *outp++ = *inp;
909 ++inp;
910 }
911
912 reg_access_end = outp;
913 first_erase = reg_access_end;
914 }
915 ++trgt;
916 search_start = trgt + 1;
917 }
918 }
919 ralloc_free(reg_access);
920 }
921
922 /* Code below used for debugging */
923 #ifndef NDEBUG
924 static const char swizzle_txt[] = "xyzw";
925
926 static const char *tgsi_file_names[PROGRAM_FILE_MAX] = {
927 "TEMP", "ARRAY", "IN", "OUT", "STATE", "CONST",
928 "UNIFORM", "WO", "ADDR", "SAMPLER", "SV", "UNDEF",
929 "IMM", "BUF", "MEM", "IMAGE"
930 };
931
932 static
933 void dump_instruction(int line, prog_scope *scope,
934 const glsl_to_tgsi_instruction& inst)
935 {
936 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst.op);
937
938 int indent = scope->nesting_depth();
939 if ((scope->type() == switch_case_branch ||
940 scope->type() == switch_default_branch) &&
941 (info->opcode == TGSI_OPCODE_CASE ||
942 info->opcode == TGSI_OPCODE_DEFAULT))
943 --indent;
944
945 if (info->opcode == TGSI_OPCODE_ENDIF ||
946 info->opcode == TGSI_OPCODE_ELSE ||
947 info->opcode == TGSI_OPCODE_ENDLOOP ||
948 info->opcode == TGSI_OPCODE_ENDSWITCH)
949 --indent;
950
951 cerr << setw(4) << line << ": ";
952 for (int i = 0; i < indent; ++i)
953 cerr << " ";
954 cerr << tgsi_get_opcode_name(info->opcode) << " ";
955
956 bool has_operators = false;
957 for (unsigned j = 0; j < num_inst_dst_regs(&inst); j++) {
958 has_operators = true;
959 if (j > 0)
960 cerr << ", ";
961
962 const st_dst_reg& dst = inst.dst[j];
963 cerr << tgsi_file_names[dst.file];
964
965 if (dst.file == PROGRAM_ARRAY)
966 cerr << "(" << dst.array_id << ")";
967
968 cerr << "[" << dst.index << "]";
969
970 if (dst.writemask != TGSI_WRITEMASK_XYZW) {
971 cerr << ".";
972 if (dst.writemask & TGSI_WRITEMASK_X) cerr << "x";
973 if (dst.writemask & TGSI_WRITEMASK_Y) cerr << "y";
974 if (dst.writemask & TGSI_WRITEMASK_Z) cerr << "z";
975 if (dst.writemask & TGSI_WRITEMASK_W) cerr << "w";
976 }
977 }
978 if (has_operators)
979 cerr << " := ";
980
981 for (unsigned j = 0; j < num_inst_src_regs(&inst); j++) {
982 if (j > 0)
983 cerr << ", ";
984
985 const st_src_reg& src = inst.src[j];
986 cerr << tgsi_file_names[src.file]
987 << "[" << src.index << "]";
988 if (src.swizzle != SWIZZLE_XYZW) {
989 cerr << ".";
990 for (int idx = 0; idx < 4; ++idx) {
991 int swz = GET_SWZ(src.swizzle, idx);
992 if (swz < 4) {
993 cerr << swizzle_txt[swz];
994 }
995 }
996 }
997 }
998
999 if (inst.tex_offset_num_offset > 0) {
1000 cerr << ", TEXOFS: ";
1001 for (unsigned j = 0; j < inst.tex_offset_num_offset; j++) {
1002 if (j > 0)
1003 cerr << ", ";
1004
1005 const st_src_reg& src = inst.tex_offsets[j];
1006 cerr << tgsi_file_names[src.file]
1007 << "[" << src.index << "]";
1008 if (src.swizzle != SWIZZLE_XYZW) {
1009 cerr << ".";
1010 for (int idx = 0; idx < 4; ++idx) {
1011 int swz = GET_SWZ(src.swizzle, idx);
1012 if (swz < 4) {
1013 cerr << swizzle_txt[swz];
1014 }
1015 }
1016 }
1017 }
1018 }
1019 cerr << "\n";
1020 }
1021 #endif