st/mesa: use R10G10B10X2 format where applicable
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi_temprename.cpp
1 /*
2 * Copyright © 2017 Gert Wollny
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "st_glsl_to_tgsi_temprename.h"
25 #include <tgsi/tgsi_info.h>
26 #include <tgsi/tgsi_strings.h>
27 #include <program/prog_instruction.h>
28 #include <limits>
29 #include <cstdlib>
30
31 /* std::sort is significantly faster than qsort */
32 #define USE_STL_SORT
33 #ifdef USE_STL_SORT
34 #include <algorithm>
35 #endif
36
37 #ifndef NDEBUG
38 #include <iostream>
39 #include <iomanip>
40 #include <program/prog_print.h>
41 #include <util/debug.h>
42 using std::cerr;
43 using std::setw;
44 #endif
45
46 /* If <windows.h> is included this is defined and clashes with
47 * std::numeric_limits<>::max()
48 */
49 #ifdef max
50 #undef max
51 #endif
52
53 using std::numeric_limits;
54
55 /* Without c++11 define the nullptr for forward-compatibility
56 * and better readibility */
57 #if __cplusplus < 201103L
58 #define nullptr 0
59 #endif
60
61 #ifndef NDEBUG
62 /* Helper function to check whether we want to seen debugging output */
63 static inline bool is_debug_enabled ()
64 {
65 static int debug_enabled = -1;
66 if (debug_enabled < 0)
67 debug_enabled = env_var_as_boolean("GLSL_TO_TGSI_RENAME_DEBUG", false);
68 return debug_enabled > 0;
69 }
70 #define RENAME_DEBUG(X) if (is_debug_enabled()) do { X; } while (false);
71 #else
72 #define RENAME_DEBUG(X)
73 #endif
74
75 namespace {
76
77 enum prog_scope_type {
78 outer_scope, /* Outer program scope */
79 loop_body, /* Inside a loop */
80 if_branch, /* Inside if branch */
81 else_branch, /* Inside else branch */
82 switch_body, /* Inside switch statmenet */
83 switch_case_branch, /* Inside switch case statmenet */
84 switch_default_branch, /* Inside switch default statmenet */
85 undefined_scope
86 };
87
88 class prog_scope {
89 public:
90 prog_scope(prog_scope *parent, prog_scope_type type, int id,
91 int depth, int begin);
92
93 prog_scope_type type() const;
94 prog_scope *parent() const;
95 int nesting_depth() const;
96 int id() const;
97 int end() const;
98 int begin() const;
99 int loop_break_line() const;
100
101 const prog_scope *in_ifelse_scope() const;
102 const prog_scope *in_switchcase_scope() const;
103 const prog_scope *innermost_loop() const;
104 const prog_scope *outermost_loop() const;
105 const prog_scope *enclosing_conditional() const;
106
107 bool is_loop() const;
108 bool is_in_loop() const;
109 bool is_conditional() const;
110
111 bool break_is_for_switchcase() const;
112 bool contains_range_of(const prog_scope& other) const;
113
114 void set_end(int end);
115 void set_loop_break_line(int line);
116
117 private:
118 prog_scope_type scope_type;
119 int scope_id;
120 int scope_nesting_depth;
121 int scope_begin;
122 int scope_end;
123 int break_loop_line;
124 prog_scope *parent_scope;
125 };
126
127 /* Some storage class to encapsulate the prog_scope (de-)allocations */
128 class prog_scope_storage {
129 public:
130 prog_scope_storage(void *mem_ctx, int n);
131 ~prog_scope_storage();
132 prog_scope * create(prog_scope *p, prog_scope_type type, int id,
133 int lvl, int s_begin);
134 private:
135 void *mem_ctx;
136 int current_slot;
137 prog_scope *storage;
138 };
139
140 class temp_comp_access {
141 public:
142 temp_comp_access();
143 void record_read(int line, prog_scope *scope);
144 void record_write(int line, prog_scope *scope);
145 lifetime get_required_lifetime();
146 private:
147 void propagate_lifetime_to_dominant_write_scope();
148
149 prog_scope *last_read_scope;
150 prog_scope *first_read_scope;
151 prog_scope *first_write_scope;
152 int first_write;
153 int last_read;
154 int last_write;
155 int first_read;
156 bool keep_for_full_loop;
157 };
158
159 class temp_access {
160 public:
161 temp_access();
162 void record_read(int line, prog_scope *scope, int swizzle);
163 void record_write(int line, prog_scope *scope, int writemask);
164 lifetime get_required_lifetime();
165 private:
166 void update_access_mask(int mask);
167
168 temp_comp_access comp[4];
169 int access_mask;
170 bool needs_component_tracking;
171 };
172
173 prog_scope_storage::prog_scope_storage(void *mc, int n):
174 mem_ctx(mc),
175 current_slot(0)
176 {
177 storage = ralloc_array(mem_ctx, prog_scope, n);
178 }
179
180 prog_scope_storage::~prog_scope_storage()
181 {
182 ralloc_free(storage);
183 }
184
185 prog_scope*
186 prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id,
187 int lvl, int s_begin)
188 {
189 storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
190 return &storage[current_slot++];
191 }
192
193 prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id,
194 int depth, int scope_begin):
195 scope_type(type),
196 scope_id(id),
197 scope_nesting_depth(depth),
198 scope_begin(scope_begin),
199 scope_end(-1),
200 break_loop_line(numeric_limits<int>::max()),
201 parent_scope(parent)
202 {
203 }
204
205 prog_scope_type prog_scope::type() const
206 {
207 return scope_type;
208 }
209
210 prog_scope *prog_scope::parent() const
211 {
212 return parent_scope;
213 }
214
215 int prog_scope::nesting_depth() const
216 {
217 return scope_nesting_depth;
218 }
219
220 bool prog_scope::is_loop() const
221 {
222 return (scope_type == loop_body);
223 }
224
225 bool prog_scope::is_in_loop() const
226 {
227 if (scope_type == loop_body)
228 return true;
229
230 if (parent_scope)
231 return parent_scope->is_in_loop();
232
233 return false;
234 }
235
236 const prog_scope *prog_scope::innermost_loop() const
237 {
238 if (scope_type == loop_body)
239 return this;
240
241 if (parent_scope)
242 return parent_scope->innermost_loop();
243
244 return nullptr;
245 }
246
247 const prog_scope *prog_scope::outermost_loop() const
248 {
249 const prog_scope *loop = nullptr;
250 const prog_scope *p = this;
251
252 do {
253 if (p->type() == loop_body)
254 loop = p;
255 p = p->parent();
256 } while (p);
257
258 return loop;
259 }
260
261 const prog_scope *prog_scope::enclosing_conditional() const
262 {
263 if (is_conditional())
264 return this;
265
266 if (parent_scope)
267 return parent_scope->enclosing_conditional();
268
269 return nullptr;
270 }
271
272 bool prog_scope::contains_range_of(const prog_scope& other) const
273 {
274 return (begin() <= other.begin()) && (end() >= other.end());
275 }
276
277 bool prog_scope::is_conditional() const
278 {
279 return scope_type == if_branch ||
280 scope_type == else_branch ||
281 scope_type == switch_case_branch ||
282 scope_type == switch_default_branch;
283 }
284
285 const prog_scope *prog_scope::in_ifelse_scope() const
286 {
287 if (scope_type == if_branch ||
288 scope_type == else_branch)
289 return this;
290
291 if (parent_scope)
292 return parent_scope->in_ifelse_scope();
293
294 return nullptr;
295 }
296
297 const prog_scope *prog_scope::in_switchcase_scope() const
298 {
299 if (scope_type == switch_case_branch ||
300 scope_type == switch_default_branch)
301 return this;
302
303 if (parent_scope)
304 return parent_scope->in_switchcase_scope();
305
306 return nullptr;
307 }
308
309 bool prog_scope::break_is_for_switchcase() const
310 {
311 if (scope_type == loop_body)
312 return false;
313
314 if (scope_type == switch_case_branch ||
315 scope_type == switch_default_branch ||
316 scope_type == switch_body)
317 return true;
318
319 if (parent_scope)
320 return parent_scope->break_is_for_switchcase();
321
322 return false;
323 }
324
325 int prog_scope::id() const
326 {
327 return scope_id;
328 }
329
330 int prog_scope::begin() const
331 {
332 return scope_begin;
333 }
334
335 int prog_scope::end() const
336 {
337 return scope_end;
338 }
339
340 void prog_scope::set_end(int end)
341 {
342 if (scope_end == -1)
343 scope_end = end;
344 }
345
346 void prog_scope::set_loop_break_line(int line)
347 {
348 if (scope_type == loop_body) {
349 break_loop_line = MIN2(break_loop_line, line);
350 } else {
351 if (parent_scope)
352 parent()->set_loop_break_line(line);
353 }
354 }
355
356 int prog_scope::loop_break_line() const
357 {
358 return break_loop_line;
359 }
360
361 temp_access::temp_access():
362 access_mask(0),
363 needs_component_tracking(false)
364 {
365 }
366
367 void temp_access::update_access_mask(int mask)
368 {
369 if (access_mask && access_mask != mask)
370 needs_component_tracking = true;
371 access_mask |= mask;
372 }
373
374 void temp_access::record_write(int line, prog_scope *scope, int writemask)
375 {
376 update_access_mask(writemask);
377
378 if (writemask & WRITEMASK_X)
379 comp[0].record_write(line, scope);
380 if (writemask & WRITEMASK_Y)
381 comp[1].record_write(line, scope);
382 if (writemask & WRITEMASK_Z)
383 comp[2].record_write(line, scope);
384 if (writemask & WRITEMASK_W)
385 comp[3].record_write(line, scope);
386 }
387
388 void temp_access::record_read(int line, prog_scope *scope, int swizzle)
389 {
390 int readmask = 0;
391 for (int idx = 0; idx < 4; ++idx) {
392 int swz = GET_SWZ(swizzle, idx);
393 readmask |= (1 << swz) & 0xF;
394 }
395 update_access_mask(readmask);
396
397 if (readmask & WRITEMASK_X)
398 comp[0].record_read(line, scope);
399 if (readmask & WRITEMASK_Y)
400 comp[1].record_read(line, scope);
401 if (readmask & WRITEMASK_Z)
402 comp[2].record_read(line, scope);
403 if (readmask & WRITEMASK_W)
404 comp[3].record_read(line, scope);
405 }
406
407 inline static lifetime make_lifetime(int b, int e)
408 {
409 lifetime lt;
410 lt.begin = b;
411 lt.end = e;
412 return lt;
413 }
414
415 lifetime temp_access::get_required_lifetime()
416 {
417 lifetime result = make_lifetime(-1, -1);
418
419 unsigned mask = access_mask;
420 while (mask) {
421 unsigned chan = u_bit_scan(&mask);
422 lifetime lt = comp[chan].get_required_lifetime();
423
424 if (lt.begin >= 0) {
425 if ((result.begin < 0) || (result.begin > lt.begin))
426 result.begin = lt.begin;
427 }
428
429 if (lt.end > result.end)
430 result.end = lt.end;
431
432 if (!needs_component_tracking)
433 break;
434 }
435 return result;
436 }
437
438 temp_comp_access::temp_comp_access():
439 last_read_scope(nullptr),
440 first_read_scope(nullptr),
441 first_write_scope(nullptr),
442 first_write(-1),
443 last_read(-1),
444 last_write(-1),
445 first_read(numeric_limits<int>::max())
446 {
447 }
448
449 void temp_comp_access::record_read(int line, prog_scope *scope)
450 {
451 last_read_scope = scope;
452 last_read = line;
453
454 if (first_read > line) {
455 first_read = line;
456 first_read_scope = scope;
457 }
458 }
459
460 void temp_comp_access::record_write(int line, prog_scope *scope)
461 {
462 last_write = line;
463
464 if (first_write < 0) {
465 first_write = line;
466 first_write_scope = scope;
467 }
468 }
469
470 void temp_comp_access::propagate_lifetime_to_dominant_write_scope()
471 {
472 first_write = first_write_scope->begin();
473 int lr = first_write_scope->end();
474
475 if (last_read < lr)
476 last_read = lr;
477 }
478
479 lifetime temp_comp_access::get_required_lifetime()
480 {
481 bool keep_for_full_loop = false;
482
483 /* This register component is not used at all, or only read,
484 * mark it as unused and ignore it when renaming.
485 * glsl_to_tgsi_visitor::renumber_registers will take care of
486 * eliminating registers that are not written to.
487 */
488 if (last_write < 0)
489 return make_lifetime(-1, -1);
490
491 assert(first_write_scope);
492
493 /* Only written to, just make sure the register component is not
494 * reused in the range it is used to write to
495 */
496 if (!last_read_scope)
497 return make_lifetime(first_write, last_write + 1);
498
499 const prog_scope *enclosing_scope_first_read = first_read_scope;
500 const prog_scope *enclosing_scope_first_write = first_write_scope;
501
502 /* We read before writing in a loop
503 * hence the value must survive the loops
504 */
505 if ((first_read <= first_write) &&
506 first_read_scope->is_in_loop()) {
507 keep_for_full_loop = true;
508 enclosing_scope_first_read = first_read_scope->outermost_loop();
509 }
510
511 /* A conditional write within a nested loop must survive
512 * the outermost loop, but only if it is read outside
513 * the condition scope where we write.
514 */
515 const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional();
516 if (conditional && conditional->is_in_loop() &&
517 !conditional->contains_range_of(*last_read_scope)) {
518 keep_for_full_loop = true;
519 enclosing_scope_first_write = conditional->outermost_loop();
520 }
521
522 /* Evaluate the scope that is shared by all: required first write scope,
523 * required first read before write scope, and last read scope.
524 */
525 const prog_scope *enclosing_scope = enclosing_scope_first_read;
526 if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
527 enclosing_scope = enclosing_scope_first_write;
528
529 if (last_read_scope->contains_range_of(*enclosing_scope))
530 enclosing_scope = last_read_scope;
531
532 while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
533 !enclosing_scope->contains_range_of(*last_read_scope)) {
534 enclosing_scope = enclosing_scope->parent();
535 assert(enclosing_scope);
536 }
537
538 /* Propagate the last read scope to the target scope */
539 while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
540 /* If the read is in a loop and we have to move up the scope we need to
541 * extend the life time to the end of this current loop because at this
542 * point we don't know whether the component was written before
543 * un-conditionally in the same loop.
544 */
545 if (last_read_scope->is_loop())
546 last_read = last_read_scope->end();
547
548 last_read_scope = last_read_scope->parent();
549 }
550
551 /* If the variable has to be kept for the whole loop, and we
552 * are currently in a loop, then propagate the life time.
553 */
554 if (keep_for_full_loop && first_write_scope->is_loop())
555 propagate_lifetime_to_dominant_write_scope();
556
557 /* Propagate the first_dominant_write scope to the target scope */
558 while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
559 /* Propagate lifetime if there was a break in a loop and the write was
560 * after the break inside that loop. Note, that this is only needed if
561 * we move up in the scopes.
562 */
563 if (first_write_scope->loop_break_line() < first_write) {
564 keep_for_full_loop = true;
565 propagate_lifetime_to_dominant_write_scope();
566 }
567
568 first_write_scope = first_write_scope->parent();
569
570 /* Propagte lifetime if we are now in a loop */
571 if (keep_for_full_loop && first_write_scope->is_loop())
572 propagate_lifetime_to_dominant_write_scope();
573 }
574
575 /* The last write past the last read is dead code, but we have to
576 * ensure that the component is not reused too early, hence extend the
577 * lifetime past the last write.
578 */
579 if (last_write >= last_read)
580 last_read = last_write + 1;
581
582 /* Here we are at the same scope, all is resolved */
583 return make_lifetime(first_write, last_read);
584 }
585
586 /* Helper class for sorting and searching the registers based
587 * on life times. */
588 class access_record {
589 public:
590 int begin;
591 int end;
592 int reg;
593 bool erase;
594
595 bool operator < (const access_record& rhs) const {
596 return begin < rhs.begin;
597 }
598 };
599
600 }
601
602 #ifndef NDEBUG
603 /* Function used for debugging. */
604 static void dump_instruction(int line, prog_scope *scope,
605 const glsl_to_tgsi_instruction& inst);
606 #endif
607
608 /* Scan the program and estimate the required register life times.
609 * The array lifetimes must be pre-allocated
610 */
611 bool
612 get_temp_registers_required_lifetimes(void *mem_ctx, exec_list *instructions,
613 int ntemps, struct lifetime *lifetimes)
614 {
615 int line = 0;
616 int loop_id = 0;
617 int if_id = 0;
618 int switch_id = 0;
619 bool is_at_end = false;
620 bool ok = true;
621 int n_scopes = 1;
622
623 /* Count scopes to allocate the needed space without the need for
624 * re-allocation
625 */
626 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
627 if (inst->op == TGSI_OPCODE_BGNLOOP ||
628 inst->op == TGSI_OPCODE_SWITCH ||
629 inst->op == TGSI_OPCODE_CASE ||
630 inst->op == TGSI_OPCODE_IF ||
631 inst->op == TGSI_OPCODE_UIF ||
632 inst->op == TGSI_OPCODE_ELSE ||
633 inst->op == TGSI_OPCODE_DEFAULT)
634 ++n_scopes;
635 }
636
637 prog_scope_storage scopes(mem_ctx, n_scopes);
638 temp_access *acc = new temp_access[ntemps];
639
640 prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line);
641
642 RENAME_DEBUG(cerr << "========= Begin shader ============\n");
643
644 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
645 if (is_at_end) {
646 assert(!"GLSL_TO_TGSI: shader has instructions past end marker");
647 break;
648 }
649
650 RENAME_DEBUG(dump_instruction(line, cur_scope, *inst));
651
652 switch (inst->op) {
653 case TGSI_OPCODE_BGNLOOP: {
654 cur_scope = scopes.create(cur_scope, loop_body, loop_id++,
655 cur_scope->nesting_depth() + 1, line);
656 break;
657 }
658 case TGSI_OPCODE_ENDLOOP: {
659 cur_scope->set_end(line);
660 cur_scope = cur_scope->parent();
661 assert(cur_scope);
662 break;
663 }
664 case TGSI_OPCODE_IF:
665 case TGSI_OPCODE_UIF: {
666 assert(num_inst_src_regs(inst) == 1);
667 const st_src_reg& src = inst->src[0];
668 if (src.file == PROGRAM_TEMPORARY)
669 acc[src.index].record_read(line, cur_scope, src.swizzle);
670 cur_scope = scopes.create(cur_scope, if_branch, if_id++,
671 cur_scope->nesting_depth() + 1, line + 1);
672 break;
673 }
674 case TGSI_OPCODE_ELSE: {
675 assert(cur_scope->type() == if_branch);
676 cur_scope->set_end(line - 1);
677 cur_scope = scopes.create(cur_scope->parent(), else_branch,
678 cur_scope->id(), cur_scope->nesting_depth(),
679 line + 1);
680 break;
681 }
682 case TGSI_OPCODE_END: {
683 cur_scope->set_end(line);
684 is_at_end = true;
685 break;
686 }
687 case TGSI_OPCODE_ENDIF: {
688 cur_scope->set_end(line - 1);
689 cur_scope = cur_scope->parent();
690 assert(cur_scope);
691 break;
692 }
693 case TGSI_OPCODE_SWITCH: {
694 assert(num_inst_src_regs(inst) == 1);
695 const st_src_reg& src = inst->src[0];
696 prog_scope *scope = scopes.create(cur_scope, switch_body, switch_id++,
697 cur_scope->nesting_depth() + 1, line);
698 /* We record the read only for the SWITCH statement itself, like it
699 * is used by the only consumer of TGSI_OPCODE_SWITCH in tgsi_exec.c.
700 */
701 if (src.file == PROGRAM_TEMPORARY)
702 acc[src.index].record_read(line, cur_scope, src.swizzle);
703 cur_scope = scope;
704 break;
705 }
706 case TGSI_OPCODE_ENDSWITCH: {
707 cur_scope->set_end(line - 1);
708 /* Remove the case level, it might not have been
709 * closed with a break.
710 */
711 if (cur_scope->type() != switch_body)
712 cur_scope = cur_scope->parent();
713
714 cur_scope = cur_scope->parent();
715 assert(cur_scope);
716 break;
717 }
718 case TGSI_OPCODE_CASE: {
719 /* Take care of tracking the registers. */
720 prog_scope *switch_scope = cur_scope->type() == switch_body ?
721 cur_scope : cur_scope->parent();
722
723 assert(num_inst_src_regs(inst) == 1);
724 const st_src_reg& src = inst->src[0];
725 if (src.file == PROGRAM_TEMPORARY)
726 acc[src.index].record_read(line, switch_scope, src.swizzle);
727
728 /* Fall through to allocate the scope. */
729 }
730 case TGSI_OPCODE_DEFAULT: {
731 prog_scope_type t = inst->op == TGSI_OPCODE_CASE ? switch_case_branch
732 : switch_default_branch;
733 prog_scope *switch_scope = (cur_scope->type() == switch_body) ?
734 cur_scope : cur_scope->parent();
735 assert(switch_scope->type() == switch_body);
736 prog_scope *scope = scopes.create(switch_scope, t,
737 switch_scope->id(),
738 switch_scope->nesting_depth() + 1,
739 line);
740 /* Previous case falls through, so scope was not yet closed. */
741 if ((cur_scope != switch_scope) && (cur_scope->end() == -1))
742 cur_scope->set_end(line - 1);
743 cur_scope = scope;
744 break;
745 }
746 case TGSI_OPCODE_BRK: {
747 if (cur_scope->break_is_for_switchcase()) {
748 cur_scope->set_end(line - 1);
749 } else {
750 cur_scope->set_loop_break_line(line);
751 }
752 break;
753 }
754 case TGSI_OPCODE_CAL:
755 case TGSI_OPCODE_RET:
756 /* These opcodes are not supported and if a subroutine would
757 * be called in a shader, then the lifetime tracking would have
758 * to follow that call to see which registers are used there.
759 * Since this is not done, we have to bail out here and signal
760 * that no register merge will take place.
761 */
762 ok = false;
763 goto out;
764 default: {
765 for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
766 const st_src_reg& src = inst->src[j];
767 if (src.file == PROGRAM_TEMPORARY)
768 acc[src.index].record_read(line, cur_scope, src.swizzle);
769 }
770 for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
771 const st_src_reg& src = inst->tex_offsets[j];
772 if (src.file == PROGRAM_TEMPORARY)
773 acc[src.index].record_read(line, cur_scope, src.swizzle);
774 }
775 for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
776 const st_dst_reg& dst = inst->dst[j];
777 if (dst.file == PROGRAM_TEMPORARY)
778 acc[dst.index].record_write(line, cur_scope, dst.writemask);
779 }
780 }
781 }
782 ++line;
783 }
784
785 RENAME_DEBUG(cerr << "==================================\n\n");
786
787 /* Make sure last scope is closed, even though no
788 * TGSI_OPCODE_END was given.
789 */
790 if (cur_scope->end() < 0)
791 cur_scope->set_end(line - 1);
792
793 RENAME_DEBUG(cerr << "========= lifetimes ==============\n");
794 for(int i = 0; i < ntemps; ++i) {
795 RENAME_DEBUG(cerr << setw(4) << i);
796 lifetimes[i] = acc[i].get_required_lifetime();
797 RENAME_DEBUG(cerr << ": [" << lifetimes[i].begin << ", "
798 << lifetimes[i].end << "]\n");
799 }
800 RENAME_DEBUG(cerr << "==================================\n\n");
801
802 out:
803 delete[] acc;
804 return ok;
805 }
806
807 /* Find the next register between [start, end) that has a life time starting
808 * at or after bound by using a binary search.
809 * start points at the beginning of the search range,
810 * end points at the element past the end of the search range, and
811 * the array comprising [start, end) must be sorted in ascending order.
812 */
813 static access_record*
814 find_next_rename(access_record* start, access_record* end, int bound)
815 {
816 int delta = (end - start);
817
818 while (delta > 0) {
819 int half = delta >> 1;
820 access_record* middle = start + half;
821
822 if (bound <= middle->begin) {
823 delta = half;
824 } else {
825 start = middle;
826 ++start;
827 delta -= half + 1;
828 }
829 }
830
831 return start;
832 }
833
834 #ifndef USE_STL_SORT
835 static int access_record_compare (const void *a, const void *b) {
836 const access_record *aa = static_cast<const access_record*>(a);
837 const access_record *bb = static_cast<const access_record*>(b);
838 return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0);
839 }
840 #endif
841
842 /* This functions evaluates the register merges by using a binary
843 * search to find suitable merge candidates. */
844 void get_temp_registers_remapping(void *mem_ctx, int ntemps,
845 const struct lifetime* lifetimes,
846 struct rename_reg_pair *result)
847 {
848 access_record *reg_access = ralloc_array(mem_ctx, access_record, ntemps);
849
850 int used_temps = 0;
851 for (int i = 0; i < ntemps; ++i) {
852 if (lifetimes[i].begin >= 0) {
853 reg_access[used_temps].begin = lifetimes[i].begin;
854 reg_access[used_temps].end = lifetimes[i].end;
855 reg_access[used_temps].reg = i;
856 reg_access[used_temps].erase = false;
857 ++used_temps;
858 }
859 }
860
861 #ifdef USE_STL_SORT
862 std::sort(reg_access, reg_access + used_temps);
863 #else
864 std::qsort(reg_access, used_temps, sizeof(access_record), access_record_compare);
865 #endif
866
867 access_record *trgt = reg_access;
868 access_record *reg_access_end = reg_access + used_temps;
869 access_record *first_erase = reg_access_end;
870 access_record *search_start = trgt + 1;
871
872 while (trgt != reg_access_end) {
873 access_record *src = find_next_rename(search_start, reg_access_end,
874 trgt->end);
875 if (src != reg_access_end) {
876 result[src->reg].new_reg = trgt->reg;
877 result[src->reg].valid = true;
878 trgt->end = src->end;
879
880 /* Since we only search forward, don't remove the renamed
881 * register just now, only mark it. */
882 src->erase = true;
883
884 if (first_erase == reg_access_end)
885 first_erase = src;
886
887 search_start = src + 1;
888 } else {
889 /* Moving to the next target register it is time to remove
890 * the already merged registers from the search range */
891 if (first_erase != reg_access_end) {
892 access_record *outp = first_erase;
893 access_record *inp = first_erase + 1;
894
895 while (inp != reg_access_end) {
896 if (!inp->erase)
897 *outp++ = *inp;
898 ++inp;
899 }
900
901 reg_access_end = outp;
902 first_erase = reg_access_end;
903 }
904 ++trgt;
905 search_start = trgt + 1;
906 }
907 }
908 ralloc_free(reg_access);
909 }
910
911 /* Code below used for debugging */
912 #ifndef NDEBUG
913 static const char swizzle_txt[] = "xyzw";
914
915 static const char *tgsi_file_names[PROGRAM_FILE_MAX] = {
916 "TEMP", "ARRAY", "IN", "OUT", "STATE", "CONST",
917 "UNIFORM", "WO", "ADDR", "SAMPLER", "SV", "UNDEF",
918 "IMM", "BUF", "MEM", "IMAGE"
919 };
920
921 static
922 void dump_instruction(int line, prog_scope *scope,
923 const glsl_to_tgsi_instruction& inst)
924 {
925 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst.op);
926
927 int indent = scope->nesting_depth();
928 if ((scope->type() == switch_case_branch ||
929 scope->type() == switch_default_branch) &&
930 (info->opcode == TGSI_OPCODE_CASE ||
931 info->opcode == TGSI_OPCODE_DEFAULT))
932 --indent;
933
934 if (info->opcode == TGSI_OPCODE_ENDIF ||
935 info->opcode == TGSI_OPCODE_ELSE ||
936 info->opcode == TGSI_OPCODE_ENDLOOP ||
937 info->opcode == TGSI_OPCODE_ENDSWITCH)
938 --indent;
939
940 cerr << setw(4) << line << ": ";
941 for (int i = 0; i < indent; ++i)
942 cerr << " ";
943 cerr << tgsi_get_opcode_name(info->opcode) << " ";
944
945 bool has_operators = false;
946 for (unsigned j = 0; j < num_inst_dst_regs(&inst); j++) {
947 has_operators = true;
948 if (j > 0)
949 cerr << ", ";
950
951 const st_dst_reg& dst = inst.dst[j];
952 cerr << tgsi_file_names[dst.file];
953
954 if (dst.file == PROGRAM_ARRAY)
955 cerr << "(" << dst.array_id << ")";
956
957 cerr << "[" << dst.index << "]";
958
959 if (dst.writemask != TGSI_WRITEMASK_XYZW) {
960 cerr << ".";
961 if (dst.writemask & TGSI_WRITEMASK_X) cerr << "x";
962 if (dst.writemask & TGSI_WRITEMASK_Y) cerr << "y";
963 if (dst.writemask & TGSI_WRITEMASK_Z) cerr << "z";
964 if (dst.writemask & TGSI_WRITEMASK_W) cerr << "w";
965 }
966 }
967 if (has_operators)
968 cerr << " := ";
969
970 for (unsigned j = 0; j < num_inst_src_regs(&inst); j++) {
971 if (j > 0)
972 cerr << ", ";
973
974 const st_src_reg& src = inst.src[j];
975 cerr << tgsi_file_names[src.file]
976 << "[" << src.index << "]";
977 if (src.swizzle != SWIZZLE_XYZW) {
978 cerr << ".";
979 for (int idx = 0; idx < 4; ++idx) {
980 int swz = GET_SWZ(src.swizzle, idx);
981 if (swz < 4) {
982 cerr << swizzle_txt[swz];
983 }
984 }
985 }
986 }
987
988 if (inst.tex_offset_num_offset > 0) {
989 cerr << ", TEXOFS: ";
990 for (unsigned j = 0; j < inst.tex_offset_num_offset; j++) {
991 if (j > 0)
992 cerr << ", ";
993
994 const st_src_reg& src = inst.tex_offsets[j];
995 cerr << tgsi_file_names[src.file]
996 << "[" << src.index << "]";
997 if (src.swizzle != SWIZZLE_XYZW) {
998 cerr << ".";
999 for (int idx = 0; idx < 4; ++idx) {
1000 int swz = GET_SWZ(src.swizzle, idx);
1001 if (swz < 4) {
1002 cerr << swizzle_txt[swz];
1003 }
1004 }
1005 }
1006 }
1007 }
1008 cerr << "\n";
1009 }
1010 #endif