f0e41f5863774fcda77c40d5cc0173b6af07eff1
[mesa.git] / src / gallium / drivers / r600 / sb / sb_sched.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define PSC_DEBUG 0
28
29 #if PSC_DEBUG
30 #define PSC_DUMP(a) do { a } while (0)
31 #else
32 #define PSC_DUMP(a)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38 #include "sb_sched.h"
39
40 namespace r600_sb {
41
42 rp_kcache_tracker::rp_kcache_tracker(shader &sh) : rp(), uc(),
43 // FIXME: for now we'll use "two const pairs" limit for r600, same as
44 // for other chips, otherwise additional check in alu_group_tracker is
45 // required to make sure that all 4 consts in the group fit into 2
46 // kcache sets
47 sel_count(2) {}
48
49 bool rp_kcache_tracker::try_reserve(sel_chan r) {
50 unsigned sel = kc_sel(r);
51
52 for (unsigned i = 0; i < sel_count; ++i) {
53 if (rp[i] == 0) {
54 rp[i] = sel;
55 ++uc[i];
56 return true;
57 }
58 if (rp[i] == sel) {
59 ++uc[i];
60 return true;
61 }
62 }
63 return false;
64 }
65
66 bool rp_kcache_tracker::try_reserve(node* n) {
67 bool need_unreserve = false;
68 vvec::iterator I(n->src.begin()), E(n->src.end());
69
70 for (; I != E; ++I) {
71 value *v = *I;
72 if (v->is_kcache()) {
73 if (!try_reserve(v->select))
74 break;
75 else
76 need_unreserve = true;
77 }
78 }
79 if (I == E)
80 return true;
81
82 if (need_unreserve && I != n->src.begin()) {
83 do {
84 --I;
85 value *v =*I;
86 if (v->is_kcache())
87 unreserve(v->select);
88 } while (I != n->src.begin());
89 }
90 return false;
91 }
92
93 inline
94 void rp_kcache_tracker::unreserve(node* n) {
95 vvec::iterator I(n->src.begin()), E(n->src.end());
96 for (; I != E; ++I) {
97 value *v = *I;
98 if (v->is_kcache())
99 unreserve(v->select);
100 }
101 }
102
103 void rp_kcache_tracker::unreserve(sel_chan r) {
104 unsigned sel = kc_sel(r);
105
106 for (unsigned i = 0; i < sel_count; ++i)
107 if (rp[i] == sel) {
108 if (--uc[i] == 0)
109 rp[i] = 0;
110 return;
111 }
112 assert(0);
113 return;
114 }
115
116 bool literal_tracker::try_reserve(alu_node* n) {
117 bool need_unreserve = false;
118
119 vvec::iterator I(n->src.begin()), E(n->src.end());
120
121 for (; I != E; ++I) {
122 value *v = *I;
123 if (v->is_literal()) {
124 if (!try_reserve(v->literal_value))
125 break;
126 else
127 need_unreserve = true;
128 }
129 }
130 if (I == E)
131 return true;
132
133 if (need_unreserve && I != n->src.begin()) {
134 do {
135 --I;
136 value *v =*I;
137 if (v->is_literal())
138 unreserve(v->literal_value);
139 } while (I != n->src.begin());
140 }
141 return false;
142 }
143
144 void literal_tracker::unreserve(alu_node* n) {
145 unsigned nsrc = n->bc.op_ptr->src_count, i;
146
147 for (i = 0; i < nsrc; ++i) {
148 value *v = n->src[i];
149 if (v->is_literal())
150 unreserve(v->literal_value);
151 }
152 }
153
154 bool literal_tracker::try_reserve(literal l) {
155
156 PSC_DUMP( sblog << "literal reserve " << l.u << " " << l.f << "\n"; );
157
158 for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
159 if (lt[i] == 0) {
160 lt[i] = l;
161 ++uc[i];
162 PSC_DUMP( sblog << " reserved new uc = " << uc[i] << "\n"; );
163 return true;
164 } else if (lt[i] == l) {
165 ++uc[i];
166 PSC_DUMP( sblog << " reserved uc = " << uc[i] << "\n"; );
167 return true;
168 }
169 }
170 PSC_DUMP( sblog << " failed to reserve literal\n"; );
171 return false;
172 }
173
174 void literal_tracker::unreserve(literal l) {
175
176 PSC_DUMP( sblog << "literal unreserve " << l.u << " " << l.f << "\n"; );
177
178 for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
179 if (lt[i] == l) {
180 if (--uc[i] == 0)
181 lt[i] = 0;
182 return;
183 }
184 }
185 assert(0);
186 return;
187 }
188
189 static inline unsigned bs_cycle_vector(unsigned bs, unsigned src) {
190 static const unsigned swz[VEC_NUM][3] = {
191 {0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
192 };
193 assert(bs < VEC_NUM && src < 3);
194 return swz[bs][src];
195 }
196
197 static inline unsigned bs_cycle_scalar(unsigned bs, unsigned src) {
198 static const unsigned swz[SCL_NUM][3] = {
199 {2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
200 };
201
202 if (bs >= SCL_NUM || src >= 3) {
203 // this prevents gcc warning "array subscript is above array bounds"
204 // AFAICS we should never hit this path
205 abort();
206 }
207 return swz[bs][src];
208 }
209
210 static inline unsigned bs_cycle(bool trans, unsigned bs, unsigned src) {
211 return trans ? bs_cycle_scalar(bs, src) : bs_cycle_vector(bs, src);
212 }
213
214 inline
215 bool rp_gpr_tracker::try_reserve(unsigned cycle, unsigned sel, unsigned chan) {
216 ++sel;
217 if (rp[cycle][chan] == 0) {
218 rp[cycle][chan] = sel;
219 ++uc[cycle][chan];
220 return true;
221 } else if (rp[cycle][chan] == sel) {
222 ++uc[cycle][chan];
223 return true;
224 }
225 return false;
226 }
227
228 inline
229 void rp_gpr_tracker::unreserve(alu_node* n) {
230 unsigned nsrc = n->bc.op_ptr->src_count, i;
231 unsigned trans = n->bc.slot == SLOT_TRANS;
232 unsigned bs = n->bc.bank_swizzle;
233 unsigned opt = !trans
234 && n->bc.src[0].sel == n->bc.src[1].sel
235 && n->bc.src[0].chan == n->bc.src[1].chan;
236
237 for (i = 0; i < nsrc; ++i) {
238 value *v = n->src[i];
239 if (v->is_readonly())
240 continue;
241 if (i == 1 && opt)
242 continue;
243 unsigned cycle = bs_cycle(trans, bs, i);
244 unreserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan);
245 }
246 }
247
248 inline
249 void rp_gpr_tracker::unreserve(unsigned cycle, unsigned sel, unsigned chan) {
250 ++sel;
251 assert(rp[cycle][chan] == sel && uc[cycle][chan]);
252 if (--uc[cycle][chan] == 0)
253 rp[cycle][chan] = 0;
254 }
255
256 inline
257 bool rp_gpr_tracker::try_reserve(alu_node* n) {
258 unsigned nsrc = n->bc.op_ptr->src_count, i;
259 unsigned trans = n->bc.slot == SLOT_TRANS;
260 unsigned bs = n->bc.bank_swizzle;
261 unsigned opt = !trans && nsrc >= 2 &&
262 n->src[0] == n->src[1];
263
264 bool need_unreserve = false;
265 unsigned const_count = 0, min_gpr_cycle = 3;
266
267 for (i = 0; i < nsrc; ++i) {
268 value *v = n->src[i];
269 if (v->is_readonly()) {
270 const_count++;
271 if (trans && const_count == 3)
272 break;
273 } else {
274 if (i == 1 && opt)
275 continue;
276
277 unsigned cycle = bs_cycle(trans, bs, i);
278
279 if (trans && cycle < min_gpr_cycle)
280 min_gpr_cycle = cycle;
281
282 if (const_count && cycle < const_count && trans)
283 break;
284
285 if (!try_reserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan))
286 break;
287 else
288 need_unreserve = true;
289 }
290 }
291
292 if ((i == nsrc) && (min_gpr_cycle + 1 > const_count))
293 return true;
294
295 if (need_unreserve && i--) {
296 do {
297 value *v = n->src[i];
298 if (!v->is_readonly()) {
299 if (i == 1 && opt)
300 continue;
301 unreserve(bs_cycle(trans, bs, i), n->bc.src[i].sel,
302 n->bc.src[i].chan);
303 }
304 } while (i--);
305 }
306 return false;
307 }
308
309 alu_group_tracker::alu_group_tracker(shader &sh)
310 : sh(sh), kc(sh),
311 gpr(), lt(), slots(),
312 max_slots(sh.get_ctx().is_cayman() ? 4 : 5),
313 has_mova(), uses_ar(), has_predset(), has_kill(),
314 updates_exec_mask(), chan_count(), interp_param(), next_id() {
315
316 available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
317 }
318
319 inline
320 sel_chan alu_group_tracker::get_value_id(value* v) {
321 unsigned &id = vmap[v];
322 if (!id)
323 id = ++next_id;
324 return sel_chan(id, v->get_final_chan());
325 }
326
327 inline
328 void alu_group_tracker::assign_slot(unsigned slot, alu_node* n) {
329 update_flags(n);
330 slots[slot] = n;
331 available_slots &= ~(1 << slot);
332
333 unsigned param = n->interp_param();
334
335 if (param) {
336 assert(!interp_param || interp_param == param);
337 interp_param = param;
338 }
339 }
340
341
342 void alu_group_tracker::discard_all_slots(container_node &removed_nodes) {
343 PSC_DUMP( sblog << "agt::discard_all_slots\n"; );
344 discard_slots(~available_slots & ((1 << max_slots) - 1), removed_nodes);
345 }
346
347 void alu_group_tracker::discard_slots(unsigned slot_mask,
348 container_node &removed_nodes) {
349
350 PSC_DUMP(
351 sblog << "discard_slots : packed_ops : "
352 << (unsigned)packed_ops.size() << "\n";
353 );
354
355 for (node_vec::iterator N, I = packed_ops.begin();
356 I != packed_ops.end(); I = N) {
357 N = I; ++N;
358
359 alu_packed_node *n = static_cast<alu_packed_node*>(*I);
360 unsigned pslots = n->get_slot_mask();
361
362 PSC_DUMP(
363 sblog << "discard_slots : packed slot_mask : " << pslots << "\n";
364 );
365
366 if (pslots & slot_mask) {
367
368 PSC_DUMP(
369 sblog << "discard_slots : discarding packed...\n";
370 );
371
372 removed_nodes.push_back(n);
373 slot_mask &= ~pslots;
374 N = packed_ops.erase(I);
375 available_slots |= pslots;
376 for (unsigned k = 0; k < max_slots; ++k) {
377 if (pslots & (1 << k))
378 slots[k] = NULL;
379 }
380 }
381 }
382
383 for (unsigned slot = 0; slot < max_slots; ++slot) {
384 unsigned slot_bit = 1 << slot;
385
386 if (slot_mask & slot_bit) {
387 assert(!(available_slots & slot_bit));
388 assert(slots[slot]);
389
390 assert(!(slots[slot]->bc.slot_flags & AF_4SLOT));
391
392 PSC_DUMP(
393 sblog << "discarding slot " << slot << " : ";
394 dump::dump_op(slots[slot]);
395 sblog << "\n";
396 );
397
398 removed_nodes.push_back(slots[slot]);
399 slots[slot] = NULL;
400 available_slots |= slot_bit;
401 }
402 }
403
404 alu_node *t = slots[4];
405 if (t && (t->bc.slot_flags & AF_V)) {
406 unsigned chan = t->bc.dst_chan;
407 if (!slots[chan]) {
408 PSC_DUMP(
409 sblog << "moving ";
410 dump::dump_op(t);
411 sblog << " from trans slot to free slot " << chan << "\n";
412 );
413
414 slots[chan] = t;
415 slots[4] = NULL;
416 t->bc.slot = chan;
417 }
418 }
419
420 reinit();
421 }
422
423 alu_group_node* alu_group_tracker::emit() {
424
425 alu_group_node *g = sh.create_alu_group();
426
427 lt.init_group_literals(g);
428
429 for (unsigned i = 0; i < max_slots; ++i) {
430 alu_node *n = slots[i];
431 if (n) {
432 g->push_back(n);
433 }
434 }
435 return g;
436 }
437
438 bool alu_group_tracker::try_reserve(alu_node* n) {
439 unsigned nsrc = n->bc.op_ptr->src_count;
440 unsigned slot = n->bc.slot;
441 bool trans = slot == 4;
442
443 if (slots[slot])
444 return false;
445
446 unsigned flags = n->bc.op_ptr->flags;
447
448 unsigned param = n->interp_param();
449
450 if (param && interp_param && interp_param != param)
451 return false;
452
453 if ((flags & AF_KILL) && has_predset)
454 return false;
455 if ((flags & AF_ANY_PRED) && (has_kill || has_predset))
456 return false;
457 if ((flags & AF_MOVA) && (has_mova || uses_ar))
458 return false;
459
460 if (n->uses_ar() && has_mova)
461 return false;
462
463 for (unsigned i = 0; i < nsrc; ++i) {
464
465 unsigned last_id = next_id;
466
467 value *v = n->src[i];
468 if (!v->is_any_gpr() && !v->is_rel())
469 continue;
470 sel_chan vid = get_value_id(n->src[i]);
471
472 if (vid > last_id && chan_count[vid.chan()] == 3) {
473 return false;
474 }
475
476 n->bc.src[i].sel = vid.sel();
477 n->bc.src[i].chan = vid.chan();
478 }
479
480 if (!lt.try_reserve(n))
481 return false;
482
483 if (!kc.try_reserve(n)) {
484 lt.unreserve(n);
485 return false;
486 }
487
488 unsigned fbs = n->forced_bank_swizzle();
489
490 n->bc.bank_swizzle = 0;
491
492 if (!trans & fbs)
493 n->bc.bank_swizzle = VEC_210;
494
495 if (gpr.try_reserve(n)) {
496 assign_slot(slot, n);
497 return true;
498 }
499
500 if (!fbs) {
501 unsigned swz_num = trans ? SCL_NUM : VEC_NUM;
502 for (unsigned bs = 0; bs < swz_num; ++bs) {
503 n->bc.bank_swizzle = bs;
504 if (gpr.try_reserve(n)) {
505 assign_slot(slot, n);
506 return true;
507 }
508 }
509 }
510
511 gpr.reset();
512
513 slots[slot] = n;
514 unsigned forced_swz_slots = 0;
515 int first_slot = ~0, first_nf = ~0, last_slot = ~0;
516 unsigned save_bs[5];
517
518 for (unsigned i = 0; i < max_slots; ++i) {
519 alu_node *a = slots[i];
520 if (a) {
521 if (first_slot == ~0)
522 first_slot = i;
523 last_slot = i;
524 save_bs[i] = a->bc.bank_swizzle;
525 if (a->forced_bank_swizzle()) {
526 assert(i != SLOT_TRANS);
527 forced_swz_slots |= (1 << i);
528 a->bc.bank_swizzle = VEC_210;
529 if (!gpr.try_reserve(a))
530 assert("!internal reservation error");
531 } else {
532 if (first_nf == ~0)
533 first_nf = i;
534
535 a->bc.bank_swizzle = 0;
536 }
537 }
538 }
539
540 if (first_nf == ~0) {
541 assign_slot(slot, n);
542 return true;
543 }
544
545 assert(first_slot != ~0 && last_slot != ~0);
546
547 // silence "array subscript is above array bounds" with gcc 4.8
548 if (last_slot >= 5)
549 abort();
550
551 int i = first_nf;
552 alu_node *a = slots[i];
553 bool backtrack = false;
554
555 while (1) {
556
557 PSC_DUMP(
558 sblog << " bs: trying s" << i << " bs:" << a->bc.bank_swizzle
559 << " bt:" << backtrack << "\n";
560 );
561
562 if (!backtrack && gpr.try_reserve(a)) {
563 PSC_DUMP(
564 sblog << " bs: reserved s" << i << " bs:" << a->bc.bank_swizzle
565 << "\n";
566 );
567
568 while ((++i <= last_slot) && !slots[i]);
569 if (i <= last_slot)
570 a = slots[i];
571 else
572 break;
573 } else {
574 bool itrans = i == SLOT_TRANS;
575 unsigned max_swz = itrans ? SCL_221 : VEC_210;
576
577 if (a->bc.bank_swizzle < max_swz) {
578 ++a->bc.bank_swizzle;
579
580 PSC_DUMP(
581 sblog << " bs: inc s" << i << " bs:" << a->bc.bank_swizzle
582 << "\n";
583 );
584
585 } else {
586
587 a->bc.bank_swizzle = 0;
588 while ((--i >= first_nf) && !slots[i]);
589 if (i < first_nf)
590 break;
591 a = slots[i];
592 PSC_DUMP(
593 sblog << " bs: unreserve s" << i << " bs:" << a->bc.bank_swizzle
594 << "\n";
595 );
596 gpr.unreserve(a);
597 backtrack = true;
598
599 continue;
600 }
601 }
602 backtrack = false;
603 }
604
605 if (i == last_slot + 1) {
606 assign_slot(slot, n);
607 return true;
608 }
609
610 // reservation failed, restore previous state
611 slots[slot] = NULL;
612 gpr.reset();
613 for (unsigned i = 0; i < max_slots; ++i) {
614 alu_node *a = slots[i];
615 if (a) {
616 a->bc.bank_swizzle = save_bs[i];
617 bool b = gpr.try_reserve(a);
618 assert(b);
619 }
620 }
621
622 kc.unreserve(n);
623 lt.unreserve(n);
624 return false;
625 }
626
627 bool alu_group_tracker::try_reserve(alu_packed_node* p) {
628 bool need_unreserve = false;
629 node_iterator I(p->begin()), E(p->end());
630
631 for (; I != E; ++I) {
632 alu_node *n = static_cast<alu_node*>(*I);
633 if (!try_reserve(n))
634 break;
635 else
636 need_unreserve = true;
637 }
638
639 if (I == E) {
640 packed_ops.push_back(p);
641 return true;
642 }
643
644 if (need_unreserve) {
645 while (--I != E) {
646 alu_node *n = static_cast<alu_node*>(*I);
647 slots[n->bc.slot] = NULL;
648 }
649 reinit();
650 }
651 return false;
652 }
653
654 void alu_group_tracker::reinit() {
655 alu_node * s[5];
656 memcpy(s, slots, sizeof(slots));
657
658 reset(true);
659
660 for (int i = max_slots - 1; i >= 0; --i) {
661 if (s[i] && !try_reserve(s[i])) {
662 sblog << "alu_group_tracker: reinit error on slot " << i << "\n";
663 for (unsigned i = 0; i < max_slots; ++i) {
664 sblog << " slot " << i << " : ";
665 if (s[i])
666 dump::dump_op(s[i]);
667
668 sblog << "\n";
669 }
670 assert(!"alu_group_tracker: reinit error");
671 }
672 }
673 }
674
675 void alu_group_tracker::reset(bool keep_packed) {
676 kc.reset();
677 gpr.reset();
678 lt.reset();
679 memset(slots, 0, sizeof(slots));
680 vmap.clear();
681 next_id = 0;
682 has_mova = false;
683 uses_ar = false;
684 has_predset = false;
685 has_kill = false;
686 updates_exec_mask = false;
687 available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
688 interp_param = 0;
689
690 chan_count[0] = 0;
691 chan_count[1] = 0;
692 chan_count[2] = 0;
693 chan_count[3] = 0;
694
695 if (!keep_packed)
696 packed_ops.clear();
697 }
698
699 void alu_group_tracker::update_flags(alu_node* n) {
700 unsigned flags = n->bc.op_ptr->flags;
701 has_kill |= (flags & AF_KILL);
702 has_mova |= (flags & AF_MOVA);
703 has_predset |= (flags & AF_ANY_PRED);
704 uses_ar |= n->uses_ar();
705
706 if (flags & AF_ANY_PRED) {
707 if (n->dst[2] != NULL)
708 updates_exec_mask = true;
709 }
710 }
711
712 int post_scheduler::run() {
713 run_on(sh.root);
714 return 0;
715 }
716
717 void post_scheduler::run_on(container_node* n) {
718
719 for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
720 if (I->is_container()) {
721 if (I->subtype == NST_BB) {
722 bb_node* bb = static_cast<bb_node*>(*I);
723 schedule_bb(bb);
724 } else {
725 run_on(static_cast<container_node*>(*I));
726 }
727 }
728 }
729 }
730
731 void post_scheduler::init_uc_val(container_node *c, value *v) {
732 node *d = v->any_def();
733 if (d && d->parent == c)
734 ++ucm[d];
735 }
736
737 void post_scheduler::init_uc_vec(container_node *c, vvec &vv, bool src) {
738 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
739 value *v = *I;
740 if (!v || v->is_readonly())
741 continue;
742
743 if (v->is_rel()) {
744 init_uc_val(c, v->rel);
745 init_uc_vec(c, v->muse, true);
746 } if (src) {
747 init_uc_val(c, v);
748 }
749 }
750 }
751
752 unsigned post_scheduler::init_ucm(container_node *c, node *n) {
753 init_uc_vec(c, n->src, true);
754 init_uc_vec(c, n->dst, false);
755
756 uc_map::iterator F = ucm.find(n);
757 return F == ucm.end() ? 0 : F->second;
758 }
759
760 void post_scheduler::schedule_bb(bb_node* bb) {
761 PSC_DUMP(
762 sblog << "scheduling BB " << bb->id << "\n";
763 if (!pending.empty())
764 dump::dump_op_list(&pending);
765 );
766
767 assert(pending.empty());
768 assert(bb_pending.empty());
769 assert(ready.empty());
770
771 bb_pending.append_from(bb);
772 cur_bb = bb;
773
774 node *n;
775
776 while ((n = bb_pending.back())) {
777
778 PSC_DUMP(
779 sblog << "post_sched_bb ";
780 dump::dump_op(n);
781 sblog << "\n";
782 );
783
784 if (n->subtype == NST_ALU_CLAUSE) {
785 n->remove();
786 process_alu(static_cast<container_node*>(n));
787 continue;
788 }
789
790 n->remove();
791 bb->push_front(n);
792 }
793
794 this->cur_bb = NULL;
795 }
796
797 void post_scheduler::init_regmap() {
798
799 regmap.clear();
800
801 PSC_DUMP(
802 sblog << "init_regmap: live: ";
803 dump::dump_set(sh, live);
804 sblog << "\n";
805 );
806
807 for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
808 value *v = *I;
809 assert(v);
810 if (!v->is_sgpr() || !v->is_prealloc())
811 continue;
812
813 sel_chan r = v->gpr;
814
815 PSC_DUMP(
816 sblog << "init_regmap: " << r << " <= ";
817 dump::dump_val(v);
818 sblog << "\n";
819 );
820
821 assert(r);
822 regmap[r] = v;
823 }
824 }
825
826 void post_scheduler::process_alu(container_node *c) {
827
828 ucm.clear();
829 alu.reset();
830
831 live = c->live_after;
832
833 init_globals(c->live_after, true);
834 init_globals(c->live_before, true);
835
836 init_regmap();
837
838 update_local_interferences();
839
840 for (node_riterator N, I = c->rbegin(), E = c->rend(); I != E; I = N) {
841 N = I;
842 ++N;
843
844 node *n = *I;
845 unsigned uc = init_ucm(c, n);
846
847 PSC_DUMP(
848 sblog << "process_alu uc=" << uc << " ";
849 dump::dump_op(n);
850 sblog << " ";
851 );
852
853 if (uc) {
854 n->remove();
855 pending.push_back(n);
856 PSC_DUMP( sblog << "pending\n"; );
857 } else {
858 release_op(n);
859 }
860 }
861
862 schedule_alu(c);
863 }
864
865 void post_scheduler::update_local_interferences() {
866
867 PSC_DUMP(
868 sblog << "update_local_interferences : ";
869 dump::dump_set(sh, live);
870 sblog << "\n";
871 );
872
873
874 for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
875 value *v = *I;
876 if (v->is_prealloc())
877 continue;
878
879 v->interferences.add_set(live);
880 }
881 }
882
883 void post_scheduler::update_live_src_vec(vvec &vv, val_set *born, bool src) {
884 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
885 value *v = *I;
886
887 if (!v)
888 continue;
889
890 if (src && v->is_any_gpr()) {
891 if (live.add_val(v)) {
892 if (!v->is_prealloc()) {
893 if (!cleared_interf.contains(v)) {
894 PSC_DUMP(
895 sblog << "clearing interferences for " << *v << "\n";
896 );
897 v->interferences.clear();
898 cleared_interf.add_val(v);
899 }
900 }
901 if (born)
902 born->add_val(v);
903 }
904 } else if (v->is_rel()) {
905 if (!v->rel->is_any_gpr())
906 live.add_val(v->rel);
907 update_live_src_vec(v->muse, born, true);
908 }
909 }
910 }
911
912 void post_scheduler::update_live_dst_vec(vvec &vv) {
913 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
914 value *v = *I;
915 if (!v)
916 continue;
917
918 if (v->is_rel()) {
919 update_live_dst_vec(v->mdef);
920 } else if (v->is_any_gpr()) {
921 if (!live.remove_val(v)) {
922 PSC_DUMP(
923 sblog << "failed to remove ";
924 dump::dump_val(v);
925 sblog << " from live : ";
926 dump::dump_set(sh, live);
927 sblog << "\n";
928 );
929 }
930 }
931 }
932 }
933
934 void post_scheduler::update_live(node *n, val_set *born) {
935 update_live_dst_vec(n->dst);
936 update_live_src_vec(n->src, born, true);
937 update_live_src_vec(n->dst, born, false);
938 }
939
940 void post_scheduler::process_group() {
941 alu_group_tracker &rt = alu.grp();
942
943 val_set vals_born;
944
945 recolor_locals();
946
947 PSC_DUMP(
948 sblog << "process_group: live_before : ";
949 dump::dump_set(sh, live);
950 sblog << "\n";
951 );
952
953 for (unsigned s = 0; s < ctx.num_slots; ++s) {
954 alu_node *n = rt.slot(s);
955 if (!n)
956 continue;
957
958 update_live(n, &vals_born);
959 }
960
961 PSC_DUMP(
962 sblog << "process_group: live_after : ";
963 dump::dump_set(sh, live);
964 sblog << "\n";
965 );
966
967 update_local_interferences();
968
969 for (unsigned i = 0; i < 5; ++i) {
970 node *n = rt.slot(i);
971 if (n && !n->is_mova()) {
972 release_src_values(n);
973 }
974 }
975 }
976
977 void post_scheduler::init_globals(val_set &s, bool prealloc) {
978
979 PSC_DUMP(
980 sblog << "init_globals: ";
981 dump::dump_set(sh, s);
982 sblog << "\n";
983 );
984
985 for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
986 value *v = *I;
987 if (v->is_sgpr() && !v->is_global()) {
988 v->set_global();
989
990 if (prealloc && v->is_fixed()) {
991 v->set_prealloc();
992 }
993 }
994 }
995 }
996
997 void post_scheduler::emit_clause() {
998
999 if (alu.current_ar) {
1000 emit_load_ar();
1001 process_group();
1002 alu.emit_group();
1003 }
1004
1005 alu.emit_clause(cur_bb);
1006 }
1007
1008 void post_scheduler::schedule_alu(container_node *c) {
1009
1010 assert(!ready.empty() || !ready_copies.empty());
1011
1012 while (1) {
1013
1014 prev_regmap = regmap;
1015
1016 if (!prepare_alu_group()) {
1017 if (alu.current_ar) {
1018 emit_load_ar();
1019 continue;
1020 } else
1021 break;
1022 }
1023
1024 if (!alu.check_clause_limits()) {
1025 regmap = prev_regmap;
1026 emit_clause();
1027 init_globals(live, false);
1028 continue;
1029 }
1030
1031 process_group();
1032 alu.emit_group();
1033 };
1034
1035 if (!alu.is_empty()) {
1036 emit_clause();
1037 }
1038
1039 if (!ready.empty()) {
1040 sblog << "##post_scheduler: unscheduled ready instructions :";
1041 dump::dump_op_list(&ready);
1042 assert(!"unscheduled ready instructions");
1043 }
1044
1045 if (!pending.empty()) {
1046 sblog << "##post_scheduler: unscheduled pending instructions :";
1047 dump::dump_op_list(&pending);
1048 assert(!"unscheduled pending instructions");
1049 }
1050 }
1051
1052 void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
1053 unsigned chan = v->gpr.chan();
1054
1055 for (val_set::iterator I = vs.begin(sh), E = vs.end(sh);
1056 I != E; ++I) {
1057 value *vi = *I;
1058 sel_chan gpr = vi->get_final_gpr();
1059
1060 if (vi->is_any_gpr() && gpr && vi != v &&
1061 (!v->chunk || v->chunk != vi->chunk) &&
1062 vi->is_fixed() && gpr.chan() == chan) {
1063
1064 unsigned r = gpr.sel();
1065
1066 PSC_DUMP(
1067 sblog << "\tadd_interferences: " << *vi << "\n";
1068 );
1069
1070 if (rb.size() <= r)
1071 rb.resize(r + 32);
1072 rb.set(r);
1073 }
1074 }
1075 }
1076
1077 void post_scheduler::set_color_local_val(value *v, sel_chan color) {
1078 v->gpr = color;
1079
1080 PSC_DUMP(
1081 sblog << " recolored: ";
1082 dump::dump_val(v);
1083 sblog << "\n";
1084 );
1085 }
1086
1087 void post_scheduler::set_color_local(value *v, sel_chan color) {
1088 if (v->chunk) {
1089 vvec &vv = v->chunk->values;
1090 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1091 value *v2 =*I;
1092 set_color_local_val(v2, color);
1093 }
1094 v->chunk->fix();
1095 } else {
1096 set_color_local_val(v, color);
1097 v->fix();
1098 }
1099 }
1100
1101 bool post_scheduler::recolor_local(value *v) {
1102
1103 sb_bitset rb;
1104
1105 assert(v->is_sgpr());
1106 assert(!v->is_prealloc());
1107 assert(v->gpr);
1108
1109 unsigned chan = v->gpr.chan();
1110
1111 PSC_DUMP(
1112 sblog << "recolor_local: ";
1113 dump::dump_val(v);
1114 sblog << " interferences: ";
1115 dump::dump_set(sh, v->interferences);
1116 sblog << "\n";
1117 if (v->chunk) {
1118 sblog << " in chunk: ";
1119 coalescer::dump_chunk(v->chunk);
1120 sblog << "\n";
1121 }
1122 );
1123
1124 if (v->chunk) {
1125 for (vvec::iterator I = v->chunk->values.begin(),
1126 E = v->chunk->values.end(); I != E; ++I) {
1127 value *v2 = *I;
1128
1129 PSC_DUMP( sblog << " add_interferences for " << *v2 << " :\n"; );
1130
1131 add_interferences(v, rb, v2->interferences);
1132 }
1133 } else {
1134 add_interferences(v, rb, v->interferences);
1135 }
1136
1137 PSC_DUMP(
1138 unsigned sz = rb.size();
1139 sblog << "registers bits: " << sz;
1140 for (unsigned r = 0; r < sz; ++r) {
1141 if ((r & 7) == 0)
1142 sblog << "\n " << r << " ";
1143 sblog << (rb.get(r) ? 1 : 0);
1144 }
1145 );
1146
1147 bool no_temp_gprs = v->is_global();
1148 unsigned rs, re, pass = no_temp_gprs ? 1 : 0;
1149
1150 while (pass < 2) {
1151
1152 if (pass == 0) {
1153 rs = sh.first_temp_gpr();
1154 re = MAX_GPR;
1155 } else {
1156 rs = 0;
1157 re = sh.num_nontemp_gpr();
1158 }
1159
1160 for (unsigned reg = rs; reg < re; ++reg) {
1161 if (reg >= rb.size() || !rb.get(reg)) {
1162 // color found
1163 set_color_local(v, sel_chan(reg, chan));
1164 return true;
1165 }
1166 }
1167 ++pass;
1168 }
1169
1170 assert(!"recolor_local failed");
1171 return true;
1172 }
1173
1174 void post_scheduler::emit_load_ar() {
1175
1176 regmap = prev_regmap;
1177 alu.discard_current_group();
1178
1179 alu_group_tracker &rt = alu.grp();
1180 alu_node *a = alu.create_ar_load();
1181
1182 if (!rt.try_reserve(a)) {
1183 sblog << "can't emit AR load : ";
1184 dump::dump_op(a);
1185 sblog << "\n";
1186 }
1187
1188 alu.current_ar = 0;
1189 }
1190
1191 bool post_scheduler::unmap_dst_val(value *d) {
1192
1193 if (d == alu.current_ar) {
1194 emit_load_ar();
1195 return false;
1196 }
1197
1198 if (d->is_prealloc()) {
1199 sel_chan gpr = d->get_final_gpr();
1200 rv_map::iterator F = regmap.find(gpr);
1201 value *c = NULL;
1202 if (F != regmap.end())
1203 c = F->second;
1204
1205 if (c && c!=d && (!c->chunk || c->chunk != d->chunk)) {
1206 PSC_DUMP(
1207 sblog << "dst value conflict : ";
1208 dump::dump_val(d);
1209 sblog << " regmap contains ";
1210 dump::dump_val(c);
1211 sblog << "\n";
1212 );
1213 assert(!"scheduler error");
1214 return false;
1215 } else if (c) {
1216 regmap.erase(F);
1217 }
1218 }
1219 return true;
1220 }
1221
1222 bool post_scheduler::unmap_dst(alu_node *n) {
1223 value *d = n->dst.empty() ? NULL : n->dst[0];
1224
1225 if (!d)
1226 return true;
1227
1228 if (!d->is_rel()) {
1229 if (d && d->is_any_reg()) {
1230
1231 if (d->is_AR()) {
1232 if (alu.current_ar != d) {
1233 sblog << "loading wrong ar value\n";
1234 assert(0);
1235 } else {
1236 alu.current_ar = NULL;
1237 }
1238
1239 } else if (d->is_any_gpr()) {
1240 if (!unmap_dst_val(d))
1241 return false;
1242 }
1243 }
1244 } else {
1245 for (vvec::iterator I = d->mdef.begin(), E = d->mdef.end();
1246 I != E; ++I) {
1247 d = *I;
1248 if (!d)
1249 continue;
1250
1251 assert(d->is_any_gpr());
1252
1253 if (!unmap_dst_val(d))
1254 return false;
1255 }
1256 }
1257 return true;
1258 }
1259
1260 bool post_scheduler::map_src_val(value *v) {
1261
1262 if (!v->is_prealloc())
1263 return true;
1264
1265 sel_chan gpr = v->get_final_gpr();
1266 rv_map::iterator F = regmap.find(gpr);
1267 value *c = NULL;
1268 if (F != regmap.end()) {
1269 c = F->second;
1270 if (!v->v_equal(c)) {
1271 PSC_DUMP(
1272 sblog << "can't map src value ";
1273 dump::dump_val(v);
1274 sblog << ", regmap contains ";
1275 dump::dump_val(c);
1276 sblog << "\n";
1277 );
1278 return false;
1279 }
1280 } else {
1281 regmap.insert(std::make_pair(gpr, v));
1282 }
1283 return true;
1284 }
1285
1286 bool post_scheduler::map_src_vec(vvec &vv, bool src) {
1287 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1288 value *v = *I;
1289 if (!v)
1290 continue;
1291
1292 if ((!v->is_any_gpr() || !v->is_fixed()) && !v->is_rel())
1293 continue;
1294
1295 if (v->is_rel()) {
1296 value *rel = v->rel;
1297 assert(rel);
1298
1299 if (!rel->is_const()) {
1300 if (!map_src_vec(v->muse, true))
1301 return false;
1302
1303 if (rel != alu.current_ar) {
1304 if (alu.current_ar) {
1305 PSC_DUMP(
1306 sblog << " current_AR is " << *alu.current_ar
1307 << " trying to use " << *rel << "\n";
1308 );
1309 return false;
1310 }
1311
1312 alu.current_ar = rel;
1313
1314 PSC_DUMP(
1315 sblog << " new current_AR assigned: " << *alu.current_ar
1316 << "\n";
1317 );
1318 }
1319 }
1320
1321 } else if (src) {
1322 if (!map_src_val(v)) {
1323 return false;
1324 }
1325 }
1326 }
1327 return true;
1328 }
1329
1330 bool post_scheduler::map_src(alu_node *n) {
1331 if (!map_src_vec(n->dst, false))
1332 return false;
1333
1334 if (!map_src_vec(n->src, true))
1335 return false;
1336
1337 return true;
1338 }
1339
1340 void post_scheduler::dump_regmap() {
1341
1342 sblog << "# REGMAP :\n";
1343
1344 for(rv_map::iterator I = regmap.begin(), E = regmap.end(); I != E; ++I) {
1345 sblog << " # " << I->first << " => " << *(I->second) << "\n";
1346 }
1347
1348 if (alu.current_ar)
1349 sblog << " current_AR: " << *alu.current_ar << "\n";
1350 if (alu.current_pr)
1351 sblog << " current_PR: " << *alu.current_pr << "\n";
1352 }
1353
1354 void post_scheduler::recolor_locals() {
1355 alu_group_tracker &rt = alu.grp();
1356
1357 for (unsigned s = 0; s < ctx.num_slots; ++s) {
1358 alu_node *n = rt.slot(s);
1359 if (n) {
1360 value *d = n->dst[0];
1361 if (d && d->is_sgpr() && !d->is_prealloc()) {
1362 recolor_local(d);
1363 }
1364 }
1365 }
1366 }
1367
1368 // returns true if there are interferences
1369 bool post_scheduler::check_interferences() {
1370
1371 alu_group_tracker &rt = alu.grp();
1372
1373 unsigned interf_slots;
1374
1375 bool discarded = false;
1376
1377 PSC_DUMP(
1378 sblog << "check_interferences: before: \n";
1379 dump_regmap();
1380 );
1381
1382 do {
1383
1384 interf_slots = 0;
1385
1386 for (unsigned s = 0; s < ctx.num_slots; ++s) {
1387 alu_node *n = rt.slot(s);
1388 if (n) {
1389 if (!unmap_dst(n)) {
1390 return true;
1391 }
1392 }
1393 }
1394
1395 for (unsigned s = 0; s < ctx.num_slots; ++s) {
1396 alu_node *n = rt.slot(s);
1397 if (n) {
1398 if (!map_src(n)) {
1399 interf_slots |= (1 << s);
1400 }
1401 }
1402 }
1403
1404 PSC_DUMP(
1405 for (unsigned i = 0; i < 5; ++i) {
1406 if (interf_slots & (1 << i)) {
1407 sblog << "!!!!!! interf slot: " << i << " : ";
1408 dump::dump_op(rt.slot(i));
1409 sblog << "\n";
1410 }
1411 }
1412 );
1413
1414 if (!interf_slots)
1415 break;
1416
1417 PSC_DUMP( sblog << "ci: discarding slots " << interf_slots << "\n"; );
1418
1419 rt.discard_slots(interf_slots, alu.conflict_nodes);
1420 regmap = prev_regmap;
1421 discarded = true;
1422
1423 } while(1);
1424
1425 PSC_DUMP(
1426 sblog << "check_interferences: after: \n";
1427 dump_regmap();
1428 );
1429
1430 return discarded;
1431 }
1432
1433 // add instruction(s) (alu_node or contents of alu_packed_node) to current group
1434 // returns the number of added instructions on success
1435 unsigned post_scheduler::try_add_instruction(node *n) {
1436
1437 alu_group_tracker &rt = alu.grp();
1438
1439 unsigned avail_slots = rt.avail_slots();
1440
1441 if (n->is_alu_packed()) {
1442 alu_packed_node *p = static_cast<alu_packed_node*>(n);
1443 unsigned slots = p->get_slot_mask();
1444 unsigned cnt = __builtin_popcount(slots);
1445
1446 if ((slots & avail_slots) != slots) {
1447 PSC_DUMP( sblog << " no slots \n"; );
1448 return 0;
1449 }
1450
1451 p->update_packed_items(ctx);
1452
1453 if (!rt.try_reserve(p)) {
1454 PSC_DUMP( sblog << " reservation failed \n"; );
1455 return 0;
1456 }
1457
1458 p->remove();
1459 return cnt;
1460
1461 } else {
1462 alu_node *a = static_cast<alu_node*>(n);
1463 value *d = a->dst.empty() ? NULL : a->dst[0];
1464
1465 if (d && d->is_special_reg()) {
1466 assert(a->bc.op_ptr->flags & AF_MOVA);
1467 d = NULL;
1468 }
1469
1470 unsigned allowed_slots = ctx.alu_slots_mask(a->bc.op_ptr);
1471 unsigned slot;
1472
1473 allowed_slots &= avail_slots;
1474
1475 if (!allowed_slots)
1476 return 0;
1477
1478 if (d) {
1479 slot = d->get_final_chan();
1480 a->bc.dst_chan = slot;
1481 allowed_slots &= (1 << slot) | 0x10;
1482 } else {
1483 if (a->bc.op_ptr->flags & AF_MOVA) {
1484 if (a->bc.slot_flags & AF_V)
1485 allowed_slots &= (1 << SLOT_X);
1486 else
1487 allowed_slots &= (1 << SLOT_TRANS);
1488 }
1489 }
1490
1491 // FIXME workaround for some problems with MULADD in trans slot on r700,
1492 // (is it really needed on r600?)
1493 if (a->bc.op == ALU_OP3_MULADD && !ctx.is_egcm()) {
1494 allowed_slots &= 0x0F;
1495 }
1496
1497 if (!allowed_slots) {
1498 PSC_DUMP( sblog << " no suitable slots\n"; );
1499 return 0;
1500 }
1501
1502 slot = __builtin_ctz(allowed_slots);
1503 a->bc.slot = slot;
1504
1505 PSC_DUMP( sblog << "slot: " << slot << "\n"; );
1506
1507 if (!rt.try_reserve(a)) {
1508 PSC_DUMP( sblog << " reservation failed\n"; );
1509 return 0;
1510 }
1511
1512 a->remove();
1513 return 1;
1514 }
1515 }
1516
1517 bool post_scheduler::check_copy(node *n) {
1518 if (!n->is_copy_mov())
1519 return false;
1520
1521 value *s = n->src[0];
1522 value *d = n->dst[0];
1523
1524 if (!s->is_sgpr() || !d->is_sgpr())
1525 return false;
1526
1527 if (!s->is_prealloc()) {
1528 recolor_local(s);
1529 }
1530
1531 if (s->gpr == d->gpr) {
1532
1533 PSC_DUMP(
1534 sblog << "check_copy: ";
1535 dump::dump_op(n);
1536 sblog << "\n";
1537 );
1538
1539 rv_map::iterator F = regmap.find(d->gpr);
1540 bool gpr_free = (F == regmap.end());
1541
1542 if (d->is_prealloc()) {
1543 if (gpr_free) {
1544 PSC_DUMP( sblog << " copy not ready...\n";);
1545 return true;
1546 }
1547
1548 value *rv = F->second;
1549 if (rv != d && (!rv->chunk || rv->chunk != d->chunk)) {
1550 PSC_DUMP( sblog << " copy not ready(2)...\n";);
1551 return true;
1552 }
1553
1554 unmap_dst(static_cast<alu_node*>(n));
1555 }
1556
1557 if (s->is_prealloc() && !map_src_val(s))
1558 return true;
1559
1560 update_live(n, NULL);
1561
1562 release_src_values(n);
1563 n->remove();
1564 PSC_DUMP( sblog << " copy coalesced...\n";);
1565 return true;
1566 }
1567 return false;
1568 }
1569
1570 void post_scheduler::dump_group(alu_group_tracker &rt) {
1571 for (unsigned i = 0; i < 5; ++i) {
1572 node *n = rt.slot(i);
1573 if (n) {
1574 sblog << "slot " << i << " : ";
1575 dump::dump_op(n);
1576 sblog << "\n";
1577 }
1578 }
1579 }
1580
1581 void post_scheduler::process_ready_copies() {
1582
1583 node *last;
1584
1585 do {
1586 last = ready_copies.back();
1587
1588 for (node_iterator N, I = ready_copies.begin(), E = ready_copies.end();
1589 I != E; I = N) {
1590 N = I; ++N;
1591
1592 node *n = *I;
1593
1594 if (!check_copy(n)) {
1595 n->remove();
1596 ready.push_back(n);
1597 }
1598 }
1599 } while (last != ready_copies.back());
1600
1601 update_local_interferences();
1602 }
1603
1604
1605 bool post_scheduler::prepare_alu_group() {
1606
1607 alu_group_tracker &rt = alu.grp();
1608
1609 unsigned i1 = 0;
1610
1611 PSC_DUMP(
1612 sblog << "prepare_alu_group: starting...\n";
1613 dump_group(rt);
1614 );
1615
1616 ready.append_from(&alu.conflict_nodes);
1617
1618 // FIXME rework this loop
1619
1620 do {
1621
1622 process_ready_copies();
1623
1624 ++i1;
1625
1626 for (node_iterator N, I = ready.begin(), E = ready.end(); I != E;
1627 I = N) {
1628 N = I; ++N;
1629 node *n = *I;
1630
1631 PSC_DUMP(
1632 sblog << "p_a_g: ";
1633 dump::dump_op(n);
1634 sblog << "\n";
1635 );
1636
1637
1638 unsigned cnt = try_add_instruction(n);
1639
1640 if (!cnt)
1641 continue;
1642
1643 PSC_DUMP(
1644 sblog << "current group:\n";
1645 dump_group(rt);
1646 );
1647
1648 if (rt.inst_count() == ctx.num_slots) {
1649 PSC_DUMP( sblog << " all slots used\n"; );
1650 break;
1651 }
1652 }
1653
1654 if (!check_interferences())
1655 break;
1656
1657 // don't try to add more instructions to the group with mova if this
1658 // can lead to breaking clause slot count limit - we don't want mova to
1659 // end up in the end of the new clause instead of beginning of the
1660 // current clause.
1661 if (rt.has_ar_load() && alu.total_slots() > 121)
1662 break;
1663
1664 if (rt.inst_count() && i1 > 50)
1665 break;
1666
1667 regmap = prev_regmap;
1668
1669 } while (1);
1670
1671 PSC_DUMP(
1672 sblog << " prepare_alu_group done, " << rt.inst_count()
1673 << " slot(s) \n";
1674
1675 sblog << "$$$$$$$$PAG i1=" << i1
1676 << " ready " << ready.count()
1677 << " pending " << pending.count()
1678 << " conflicting " << alu.conflict_nodes.count()
1679 <<"\n";
1680
1681 );
1682
1683 return rt.inst_count();
1684 }
1685
1686 void post_scheduler::release_src_values(node* n) {
1687 release_src_vec(n->src, true);
1688 release_src_vec(n->dst, false);
1689 }
1690
1691 void post_scheduler::release_op(node *n) {
1692 PSC_DUMP(
1693 sblog << "release_op ";
1694 dump::dump_op(n);
1695 sblog << "\n";
1696 );
1697
1698 n->remove();
1699
1700 if (n->is_copy_mov()) {
1701 ready_copies.push_back(n);
1702 } else if (n->is_mova() || n->is_pred_set()) {
1703 ready.push_front(n);
1704 } else {
1705 ready.push_back(n);
1706 }
1707 }
1708
1709 void post_scheduler::release_src_val(value *v) {
1710 node *d = v->any_def();
1711 if (d) {
1712 if (!--ucm[d])
1713 release_op(d);
1714 }
1715 }
1716
1717 void post_scheduler::release_src_vec(vvec& vv, bool src) {
1718
1719 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1720 value *v = *I;
1721 if (!v || v->is_readonly())
1722 continue;
1723
1724 if (v->is_rel()) {
1725 release_src_val(v->rel);
1726 release_src_vec(v->muse, true);
1727
1728 } else if (src) {
1729 release_src_val(v);
1730 }
1731 }
1732 }
1733
1734 void literal_tracker::reset() {
1735 memset(lt, 0, sizeof(lt));
1736 memset(uc, 0, sizeof(uc));
1737 }
1738
1739 void rp_gpr_tracker::reset() {
1740 memset(rp, 0, sizeof(rp));
1741 memset(uc, 0, sizeof(uc));
1742 }
1743
1744 void rp_kcache_tracker::reset() {
1745 memset(rp, 0, sizeof(rp));
1746 memset(uc, 0, sizeof(uc));
1747 }
1748
1749 void alu_kcache_tracker::reset() {
1750 memset(kc, 0, sizeof(kc));
1751 lines.clear();
1752 }
1753
1754 void alu_clause_tracker::reset() {
1755 group = 0;
1756 slot_count = 0;
1757 grp0.reset();
1758 grp1.reset();
1759 }
1760
1761 alu_clause_tracker::alu_clause_tracker(shader &sh)
1762 : sh(sh), kt(sh.get_ctx().hw_class), slot_count(),
1763 grp0(sh), grp1(sh),
1764 group(), clause(),
1765 push_exec_mask(),
1766 current_ar(), current_pr() {}
1767
1768 void alu_clause_tracker::emit_group() {
1769
1770 assert(grp().inst_count());
1771
1772 alu_group_node *g = grp().emit();
1773
1774 if (grp().has_update_exec_mask()) {
1775 assert(!push_exec_mask);
1776 push_exec_mask = true;
1777 }
1778
1779 assert(g);
1780
1781 if (!clause) {
1782 clause = sh.create_clause(NST_ALU_CLAUSE);
1783 }
1784
1785 clause->push_front(g);
1786
1787 slot_count += grp().slot_count();
1788
1789 new_group();
1790
1791 PSC_DUMP( sblog << " #### group emitted\n"; );
1792 }
1793
1794 void alu_clause_tracker::emit_clause(container_node *c) {
1795 assert(clause);
1796
1797 kt.init_clause(clause->bc);
1798
1799 assert(!current_ar);
1800 assert(!current_pr);
1801
1802 if (push_exec_mask)
1803 clause->bc.set_op(CF_OP_ALU_PUSH_BEFORE);
1804
1805 c->push_front(clause);
1806
1807 clause = NULL;
1808 push_exec_mask = false;
1809 slot_count = 0;
1810 kt.reset();
1811
1812 PSC_DUMP( sblog << "######### ALU clause emitted\n"; );
1813 }
1814
1815 bool alu_clause_tracker::check_clause_limits() {
1816
1817 alu_group_tracker &gt = grp();
1818
1819 unsigned slots = gt.slot_count();
1820
1821 // reserving slots to load AR and PR values
1822 unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
1823
1824 if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
1825 return false;
1826
1827 if (!kt.try_reserve(gt))
1828 return false;
1829
1830 return true;
1831 }
1832
1833 void alu_clause_tracker::new_group() {
1834 group = !group;
1835 grp().reset();
1836 }
1837
1838 bool alu_clause_tracker::is_empty() {
1839 return clause == NULL;
1840 }
1841
1842 void literal_tracker::init_group_literals(alu_group_node* g) {
1843
1844 g->literals.clear();
1845 for (unsigned i = 0; i < 4; ++i) {
1846 if (!lt[i])
1847 break;
1848
1849 g->literals.push_back(lt[i]);
1850
1851 PSC_DUMP(
1852 sblog << "literal emitted: " << lt[i].f;
1853 sblog.print_zw_hex(lt[i].u, 8);
1854 sblog << " " << lt[i].i << "\n";
1855 );
1856 }
1857 }
1858
1859 bool alu_kcache_tracker::try_reserve(alu_group_tracker& gt) {
1860 rp_kcache_tracker &kt = gt.kcache();
1861
1862 if (!kt.num_sels())
1863 return true;
1864
1865 sb_set<unsigned> group_lines;
1866
1867 unsigned nl = kt.get_lines(group_lines);
1868 assert(nl);
1869
1870 sb_set<unsigned> clause_lines(lines);
1871 lines.add_set(group_lines);
1872
1873 if (clause_lines.size() == lines.size())
1874 return true;
1875
1876 if (update_kc())
1877 return true;
1878
1879 lines = clause_lines;
1880
1881 return false;
1882 }
1883
1884 unsigned rp_kcache_tracker::get_lines(kc_lines& lines) {
1885 unsigned cnt = 0;
1886
1887 for (unsigned i = 0; i < sel_count; ++i) {
1888 unsigned line = rp[i];
1889
1890 if (!line)
1891 return cnt;
1892
1893 --line;
1894 line = (sel_count == 2) ? line >> 5 : line >> 6;
1895
1896 if (lines.insert(line).second)
1897 ++cnt;
1898 }
1899 return cnt;
1900 }
1901
1902 bool alu_kcache_tracker::update_kc() {
1903 unsigned c = 0;
1904
1905 bc_kcache old_kc[4];
1906 memcpy(old_kc, kc, sizeof(kc));
1907
1908 for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) {
1909 unsigned line = *I;
1910 unsigned bank = line >> 8;
1911
1912 line &= 0xFF;
1913
1914 if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line))
1915 ++kc[c-1].mode;
1916 else {
1917 if (c == max_kcs) {
1918 memcpy(kc, old_kc, sizeof(kc));
1919 return false;
1920 }
1921
1922 kc[c].mode = KC_LOCK_1;
1923
1924 kc[c].bank = bank;
1925 kc[c].addr = line;
1926 ++c;
1927 }
1928 }
1929 return true;
1930 }
1931
1932 alu_node* alu_clause_tracker::create_ar_load() {
1933 alu_node *a = sh.create_alu();
1934
1935 // FIXME use MOVA_GPR on R6xx
1936
1937 if (sh.get_ctx().uses_mova_gpr) {
1938 a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
1939 a->bc.slot = SLOT_TRANS;
1940 } else {
1941 a->bc.set_op(ALU_OP1_MOVA_INT);
1942 a->bc.slot = SLOT_X;
1943 }
1944
1945 a->dst.resize(1);
1946 a->src.push_back(current_ar);
1947
1948 PSC_DUMP(
1949 sblog << "created AR load: ";
1950 dump::dump_op(a);
1951 sblog << "\n";
1952 );
1953
1954 return a;
1955 }
1956
1957 void alu_clause_tracker::discard_current_group() {
1958 PSC_DUMP( sblog << "act::discard_current_group\n"; );
1959 grp().discard_all_slots(conflict_nodes);
1960 }
1961
1962 void rp_gpr_tracker::dump() {
1963 sblog << "=== gpr_tracker dump:\n";
1964 for (int c = 0; c < 3; ++c) {
1965 sblog << "cycle " << c << " ";
1966 for (int h = 0; h < 4; ++h) {
1967 sblog << rp[c][h] << ":" << uc[c][h] << " ";
1968 }
1969 sblog << "\n";
1970 }
1971 }
1972
1973 } // namespace r600_sb