r600g/sb: fix kcache handling on r6xx
[mesa.git] / src / gallium / drivers / r600 / sb / sb_sched.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define PSC_DEBUG 0
28
29 #if PSC_DEBUG
30 #define PSC_DUMP(a) do { a } while (0)
31 #else
32 #define PSC_DUMP(a)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37
38 #include "sb_pass.h"
39 #include "sb_sched.h"
40
41 namespace r600_sb {
42
43 using std::cerr;
44
45 rp_kcache_tracker::rp_kcache_tracker(shader &sh) : rp(), uc(),
46 // FIXME: for now we'll use "two const pairs" limit for r600, same as
47 // for other chips, otherwise additional check in alu_group_tracker is
48 // required to make sure that all 4 consts in the group fit into 2
49 // kcache sets
50 sel_count(2) {}
51
52 bool rp_kcache_tracker::try_reserve(sel_chan r) {
53 unsigned sel = kc_sel(r);
54
55 for (unsigned i = 0; i < sel_count; ++i) {
56 if (rp[i] == 0) {
57 rp[i] = sel;
58 ++uc[i];
59 return true;
60 }
61 if (rp[i] == sel) {
62 ++uc[i];
63 return true;
64 }
65 }
66 return false;
67 }
68
69 bool rp_kcache_tracker::try_reserve(node* n) {
70 bool need_unreserve = false;
71 vvec::iterator I(n->src.begin()), E(n->src.end());
72
73 for (; I != E; ++I) {
74 value *v = *I;
75 if (v->is_kcache()) {
76 if (!try_reserve(v->select))
77 break;
78 else
79 need_unreserve = true;
80 }
81 }
82 if (I == E)
83 return true;
84
85 if (need_unreserve && I != n->src.begin()) {
86 do {
87 --I;
88 value *v =*I;
89 if (v->is_kcache())
90 unreserve(v->select);
91 } while (I != n->src.begin());
92 }
93 return false;
94 }
95
96 inline
97 void rp_kcache_tracker::unreserve(node* n) {
98 vvec::iterator I(n->src.begin()), E(n->src.end());
99 for (; I != E; ++I) {
100 value *v = *I;
101 if (v->is_kcache())
102 unreserve(v->select);
103 }
104 }
105
106 void rp_kcache_tracker::unreserve(sel_chan r) {
107 unsigned sel = kc_sel(r);
108
109 for (unsigned i = 0; i < sel_count; ++i)
110 if (rp[i] == sel) {
111 if (--uc[i] == 0)
112 rp[i] = 0;
113 return;
114 }
115 assert(0);
116 return;
117 }
118
119 bool literal_tracker::try_reserve(alu_node* n) {
120 bool need_unreserve = false;
121
122 vvec::iterator I(n->src.begin()), E(n->src.end());
123
124 for (; I != E; ++I) {
125 value *v = *I;
126 if (v->is_literal()) {
127 if (!try_reserve(v->literal_value))
128 break;
129 else
130 need_unreserve = true;
131 }
132 }
133 if (I == E)
134 return true;
135
136 if (need_unreserve && I != n->src.begin()) {
137 do {
138 --I;
139 value *v =*I;
140 if (v->is_literal())
141 unreserve(v->literal_value);
142 } while (I != n->src.begin());
143 }
144 return false;
145 }
146
147 void literal_tracker::unreserve(alu_node* n) {
148 unsigned nsrc = n->bc.op_ptr->src_count, i;
149
150 for (i = 0; i < nsrc; ++i) {
151 value *v = n->src[i];
152 if (v->is_literal())
153 unreserve(v->literal_value);
154 }
155 }
156
157 bool literal_tracker::try_reserve(literal l) {
158
159 PSC_DUMP( cerr << "literal reserve " << l.u << " " << l.f << "\n"; );
160
161 for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
162 if (lt[i] == 0) {
163 lt[i] = l;
164 ++uc[i];
165 PSC_DUMP( cerr << " reserved new uc = " << uc[i] << "\n"; );
166 return true;
167 } else if (lt[i] == l) {
168 ++uc[i];
169 PSC_DUMP( cerr << " reserved uc = " << uc[i] << "\n"; );
170 return true;
171 }
172 }
173 PSC_DUMP( cerr << " failed to reserve literal\n"; );
174 return false;
175 }
176
177 void literal_tracker::unreserve(literal l) {
178
179 PSC_DUMP( cerr << "literal unreserve " << l.u << " " << l.f << "\n"; );
180
181 for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
182 if (lt[i] == l) {
183 if (--uc[i] == 0)
184 lt[i] = 0;
185 return;
186 }
187 }
188 assert(0);
189 return;
190 }
191
192 static inline unsigned bs_cycle_vector(unsigned bs, unsigned src) {
193 static const unsigned swz[VEC_NUM][3] = {
194 {0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
195 };
196 assert(bs < VEC_NUM && src < 3);
197 return swz[bs][src];
198 }
199
200 static inline unsigned bs_cycle_scalar(unsigned bs, unsigned src) {
201 static const unsigned swz[SCL_NUM][3] = {
202 {2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
203 };
204
205 if (bs >= SCL_NUM || src >= 3) {
206 // this prevents gcc warning "array subscript is above array bounds"
207 // AFAICS we should never hit this path
208 abort();
209 }
210 return swz[bs][src];
211 }
212
213 static inline unsigned bs_cycle(bool trans, unsigned bs, unsigned src) {
214 return trans ? bs_cycle_scalar(bs, src) : bs_cycle_vector(bs, src);
215 }
216
217 inline
218 bool rp_gpr_tracker::try_reserve(unsigned cycle, unsigned sel, unsigned chan) {
219 ++sel;
220 if (rp[cycle][chan] == 0) {
221 rp[cycle][chan] = sel;
222 ++uc[cycle][chan];
223 return true;
224 } else if (rp[cycle][chan] == sel) {
225 ++uc[cycle][chan];
226 return true;
227 }
228 return false;
229 }
230
231 inline
232 void rp_gpr_tracker::unreserve(alu_node* n) {
233 unsigned nsrc = n->bc.op_ptr->src_count, i;
234 unsigned trans = n->bc.slot == SLOT_TRANS;
235 unsigned bs = n->bc.bank_swizzle;
236 unsigned opt = !trans
237 && n->bc.src[0].sel == n->bc.src[1].sel
238 && n->bc.src[0].chan == n->bc.src[1].chan;
239
240 for (i = 0; i < nsrc; ++i) {
241 value *v = n->src[i];
242 if (v->is_readonly())
243 continue;
244 if (i == 1 && opt)
245 continue;
246 unsigned cycle = bs_cycle(trans, bs, i);
247 unreserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan);
248 }
249 }
250
251 inline
252 void rp_gpr_tracker::unreserve(unsigned cycle, unsigned sel, unsigned chan) {
253 ++sel;
254 assert(rp[cycle][chan] == sel && uc[cycle][chan]);
255 if (--uc[cycle][chan] == 0)
256 rp[cycle][chan] = 0;
257 }
258
259 inline
260 bool rp_gpr_tracker::try_reserve(alu_node* n) {
261 unsigned nsrc = n->bc.op_ptr->src_count, i;
262 unsigned trans = n->bc.slot == SLOT_TRANS;
263 unsigned bs = n->bc.bank_swizzle;
264 unsigned opt = !trans && nsrc >= 2 &&
265 n->src[0] == n->src[1];
266
267 bool need_unreserve = false;
268 unsigned const_count = 0, min_gpr_cycle = 3;
269
270 for (i = 0; i < nsrc; ++i) {
271 value *v = n->src[i];
272 if (v->is_readonly()) {
273 const_count++;
274 if (trans && const_count == 3)
275 break;
276 } else {
277 if (i == 1 && opt)
278 continue;
279
280 unsigned cycle = bs_cycle(trans, bs, i);
281
282 if (trans && cycle < min_gpr_cycle)
283 min_gpr_cycle = cycle;
284
285 if (const_count && cycle < const_count && trans)
286 break;
287
288 if (!try_reserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan))
289 break;
290 else
291 need_unreserve = true;
292 }
293 }
294
295 if ((i == nsrc) && (min_gpr_cycle + 1 > const_count))
296 return true;
297
298 if (need_unreserve && i--) {
299 do {
300 value *v = n->src[i];
301 if (!v->is_readonly()) {
302 if (i == 1 && opt)
303 continue;
304 unreserve(bs_cycle(trans, bs, i), n->bc.src[i].sel,
305 n->bc.src[i].chan);
306 }
307 } while (i--);
308 }
309 return false;
310 }
311
312 alu_group_tracker::alu_group_tracker(shader &sh)
313 : sh(sh), kc(sh),
314 gpr(), lt(), slots(),
315 max_slots(sh.get_ctx().is_cayman() ? 4 : 5),
316 has_mova(), uses_ar(), has_predset(), has_kill(),
317 updates_exec_mask(), chan_count(), interp_param(), next_id() {
318
319 available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
320 }
321
322 inline
323 sel_chan alu_group_tracker::get_value_id(value* v) {
324 unsigned &id = vmap[v];
325 if (!id)
326 id = ++next_id;
327 return sel_chan(id, v->get_final_chan());
328 }
329
330 inline
331 void alu_group_tracker::assign_slot(unsigned slot, alu_node* n) {
332 update_flags(n);
333 slots[slot] = n;
334 available_slots &= ~(1 << slot);
335
336 unsigned param = n->interp_param();
337
338 if (param) {
339 assert(!interp_param || interp_param == param);
340 interp_param = param;
341 }
342 }
343
344
345 void alu_group_tracker::discard_all_slots(container_node &removed_nodes) {
346 PSC_DUMP( cerr << "agt::discard_all_slots\n"; );
347 discard_slots(~available_slots & ((1 << max_slots) - 1), removed_nodes);
348 }
349
350 void alu_group_tracker::discard_slots(unsigned slot_mask,
351 container_node &removed_nodes) {
352
353 PSC_DUMP(
354 cerr << "discard_slots : packed_ops : " << packed_ops.size() << "\n";
355 );
356
357 for (node_vec::iterator N, I = packed_ops.begin();
358 I != packed_ops.end(); I = N) {
359 N = I; ++N;
360
361 alu_packed_node *n = static_cast<alu_packed_node*>(*I);
362 unsigned pslots = n->get_slot_mask();
363
364 PSC_DUMP(
365 cerr << "discard_slots : packed slot_mask : " << pslots << "\n";
366 );
367
368 if (pslots & slot_mask) {
369
370 PSC_DUMP(
371 cerr << "discard_slots : discarding packed...\n";
372 );
373
374 removed_nodes.push_back(n);
375 slot_mask &= ~pslots;
376 N = packed_ops.erase(I);
377 available_slots |= pslots;
378 for (unsigned k = 0; k < max_slots; ++k) {
379 if (pslots & (1 << k))
380 slots[k] = NULL;
381 }
382 }
383 }
384
385 for (unsigned slot = 0; slot < max_slots; ++slot) {
386 unsigned slot_bit = 1 << slot;
387
388 if (slot_mask & slot_bit) {
389 assert(!(available_slots & slot_bit));
390 assert(slots[slot]);
391
392 assert(!(slots[slot]->bc.slot_flags & AF_4SLOT));
393
394 PSC_DUMP(
395 cerr << "discarding slot " << slot << " : ";
396 dump::dump_op(slots[slot]);
397 cerr << "\n";
398 );
399
400 removed_nodes.push_back(slots[slot]);
401 slots[slot] = NULL;
402 available_slots |= slot_bit;
403 }
404 }
405
406 alu_node *t = slots[4];
407 if (t && (t->bc.slot_flags & AF_V)) {
408 unsigned chan = t->bc.dst_chan;
409 if (!slots[chan]) {
410 PSC_DUMP(
411 cerr << "moving ";
412 dump::dump_op(t);
413 cerr << " from trans slot to free slot " << chan << "\n";
414 );
415
416 slots[chan] = t;
417 slots[4] = NULL;
418 t->bc.slot = chan;
419 }
420 }
421
422 reinit();
423 }
424
425 alu_group_node* alu_group_tracker::emit() {
426
427 alu_group_node *g = sh.create_alu_group();
428
429 lt.init_group_literals(g);
430
431 for (unsigned i = 0; i < max_slots; ++i) {
432 alu_node *n = slots[i];
433 if (n) {
434 g->push_back(n);
435 }
436 }
437 return g;
438 }
439
440 bool alu_group_tracker::try_reserve(alu_node* n) {
441 unsigned nsrc = n->bc.op_ptr->src_count;
442 unsigned slot = n->bc.slot;
443 bool trans = slot == 4;
444
445 if (slots[slot])
446 return false;
447
448 unsigned flags = n->bc.op_ptr->flags;
449
450 unsigned param = n->interp_param();
451
452 if (param && interp_param && interp_param != param)
453 return false;
454
455 if ((flags & AF_KILL) && has_predset)
456 return false;
457 if ((flags & AF_ANY_PRED) && (has_kill || has_predset))
458 return false;
459 if ((flags & AF_MOVA) && (has_mova || uses_ar))
460 return false;
461
462 if (n->uses_ar() && has_mova)
463 return false;
464
465 for (unsigned i = 0; i < nsrc; ++i) {
466
467 unsigned last_id = next_id;
468
469 value *v = n->src[i];
470 if (!v->is_any_gpr() && !v->is_rel())
471 continue;
472 sel_chan vid = get_value_id(n->src[i]);
473
474 if (vid > last_id && chan_count[vid.chan()] == 3) {
475 return false;
476 }
477
478 n->bc.src[i].sel = vid.sel();
479 n->bc.src[i].chan = vid.chan();
480 }
481
482 if (!lt.try_reserve(n))
483 return false;
484
485 if (!kc.try_reserve(n)) {
486 lt.unreserve(n);
487 return false;
488 }
489
490 unsigned fbs = n->forced_bank_swizzle();
491
492 n->bc.bank_swizzle = 0;
493
494 if (!trans & fbs)
495 n->bc.bank_swizzle = VEC_210;
496
497 if (gpr.try_reserve(n)) {
498 assign_slot(slot, n);
499 return true;
500 }
501
502 if (!fbs) {
503 unsigned swz_num = trans ? SCL_NUM : VEC_NUM;
504 for (unsigned bs = 0; bs < swz_num; ++bs) {
505 n->bc.bank_swizzle = bs;
506 if (gpr.try_reserve(n)) {
507 assign_slot(slot, n);
508 return true;
509 }
510 }
511 }
512
513 gpr.reset();
514
515 slots[slot] = n;
516 unsigned forced_swz_slots = 0;
517 int first_slot = ~0, first_nf = ~0, last_slot = ~0;
518 unsigned save_bs[5];
519
520 for (unsigned i = 0; i < max_slots; ++i) {
521 alu_node *a = slots[i];
522 if (a) {
523 if (first_slot == ~0)
524 first_slot = i;
525 last_slot = i;
526 save_bs[i] = a->bc.bank_swizzle;
527 if (a->forced_bank_swizzle()) {
528 assert(i != SLOT_TRANS);
529 forced_swz_slots |= (1 << i);
530 a->bc.bank_swizzle = VEC_210;
531 if (!gpr.try_reserve(a))
532 assert("!internal reservation error");
533 } else {
534 if (first_nf == ~0)
535 first_nf = i;
536
537 a->bc.bank_swizzle = 0;
538 }
539 }
540 }
541
542 if (first_nf == ~0) {
543 assign_slot(slot, n);
544 return true;
545 }
546
547 assert(first_slot != ~0 && last_slot != ~0);
548
549 // silence "array subscript is above array bounds" with gcc 4.8
550 if (last_slot >= 5)
551 abort();
552
553 int i = first_nf;
554 alu_node *a = slots[i];
555 bool backtrack = false;
556
557 while (1) {
558
559 PSC_DUMP(
560 cerr << " bs: trying s" << i << " bs:" << a->bc.bank_swizzle
561 << " bt:" << backtrack << "\n";
562 );
563
564 if (!backtrack && gpr.try_reserve(a)) {
565 PSC_DUMP(
566 cerr << " bs: reserved s" << i << " bs:" << a->bc.bank_swizzle
567 << "\n";
568 );
569
570 while ((++i <= last_slot) && !slots[i]);
571 if (i <= last_slot)
572 a = slots[i];
573 else
574 break;
575 } else {
576 bool itrans = i == SLOT_TRANS;
577 unsigned max_swz = itrans ? SCL_221 : VEC_210;
578
579 if (a->bc.bank_swizzle < max_swz) {
580 ++a->bc.bank_swizzle;
581
582 PSC_DUMP(
583 cerr << " bs: inc s" << i << " bs:" << a->bc.bank_swizzle
584 << "\n";
585 );
586
587 } else {
588
589 a->bc.bank_swizzle = 0;
590 while ((--i >= first_nf) && !slots[i]);
591 if (i < first_nf)
592 break;
593 a = slots[i];
594 PSC_DUMP(
595 cerr << " bs: unreserve s" << i << " bs:" << a->bc.bank_swizzle
596 << "\n";
597 );
598 gpr.unreserve(a);
599 backtrack = true;
600
601 continue;
602 }
603 }
604 backtrack = false;
605 }
606
607 if (i == last_slot + 1) {
608 assign_slot(slot, n);
609 return true;
610 }
611
612 // reservation failed, restore previous state
613 slots[slot] = NULL;
614 gpr.reset();
615 for (unsigned i = 0; i < max_slots; ++i) {
616 alu_node *a = slots[i];
617 if (a) {
618 a->bc.bank_swizzle = save_bs[i];
619 bool b = gpr.try_reserve(a);
620 assert(b);
621 }
622 }
623
624 kc.unreserve(n);
625 lt.unreserve(n);
626 return false;
627 }
628
629 bool alu_group_tracker::try_reserve(alu_packed_node* p) {
630 bool need_unreserve = false;
631 node_iterator I(p->begin()), E(p->end());
632
633 for (; I != E; ++I) {
634 alu_node *n = static_cast<alu_node*>(*I);
635 if (!try_reserve(n))
636 break;
637 else
638 need_unreserve = true;
639 }
640
641 if (I == E) {
642 packed_ops.push_back(p);
643 return true;
644 }
645
646 if (need_unreserve) {
647 while (--I != E) {
648 alu_node *n = static_cast<alu_node*>(*I);
649 slots[n->bc.slot] = NULL;
650 }
651 reinit();
652 }
653 return false;
654 }
655
656 void alu_group_tracker::reinit() {
657 alu_node * s[5];
658 memcpy(s, slots, sizeof(slots));
659
660 reset(true);
661
662 for (int i = max_slots - 1; i >= 0; --i) {
663 if (s[i] && !try_reserve(s[i])) {
664 cerr << "alu_group_tracker: reinit error on slot " << i << "\n";
665 for (unsigned i = 0; i < max_slots; ++i) {
666 cerr << " slot " << i << " : ";
667 if (s[i])
668 dump::dump_op(s[i]);
669
670 cerr << "\n";
671 }
672 assert(!"alu_group_tracker: reinit error");
673 }
674 }
675 }
676
677 void alu_group_tracker::reset(bool keep_packed) {
678 kc.reset();
679 gpr.reset();
680 lt.reset();
681 memset(slots, 0, sizeof(slots));
682 vmap.clear();
683 next_id = 0;
684 has_mova = false;
685 uses_ar = false;
686 has_predset = false;
687 has_kill = false;
688 updates_exec_mask = false;
689 available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
690 interp_param = 0;
691
692 chan_count[0] = 0;
693 chan_count[1] = 0;
694 chan_count[2] = 0;
695 chan_count[3] = 0;
696
697 if (!keep_packed)
698 packed_ops.clear();
699 }
700
701 void alu_group_tracker::update_flags(alu_node* n) {
702 unsigned flags = n->bc.op_ptr->flags;
703 has_kill |= (flags & AF_KILL);
704 has_mova |= (flags & AF_MOVA);
705 has_predset |= (flags & AF_ANY_PRED);
706 uses_ar |= n->uses_ar();
707
708 if (flags & AF_ANY_PRED) {
709 if (n->dst[2] != NULL)
710 updates_exec_mask = true;
711 }
712 }
713
714 int post_scheduler::run() {
715 run_on(sh.root);
716 return 0;
717 }
718
719 void post_scheduler::run_on(container_node* n) {
720
721 for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
722 if (I->is_container()) {
723 if (I->subtype == NST_BB) {
724 bb_node* bb = static_cast<bb_node*>(*I);
725 schedule_bb(bb);
726 } else {
727 run_on(static_cast<container_node*>(*I));
728 }
729 }
730 }
731 }
732
733 void post_scheduler::init_uc_val(container_node *c, value *v) {
734 node *d = v->any_def();
735 if (d && d->parent == c)
736 ++ucm[d];
737 }
738
739 void post_scheduler::init_uc_vec(container_node *c, vvec &vv, bool src) {
740 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
741 value *v = *I;
742 if (!v || v->is_readonly())
743 continue;
744
745 if (v->is_rel()) {
746 init_uc_val(c, v->rel);
747 init_uc_vec(c, v->muse, true);
748 } if (src) {
749 init_uc_val(c, v);
750 }
751 }
752 }
753
754 unsigned post_scheduler::init_ucm(container_node *c, node *n) {
755 init_uc_vec(c, n->src, true);
756 init_uc_vec(c, n->dst, false);
757
758 uc_map::iterator F = ucm.find(n);
759 return F == ucm.end() ? 0 : F->second;
760 }
761
762 void post_scheduler::schedule_bb(bb_node* bb) {
763 PSC_DUMP(
764 cerr << "scheduling BB " << bb->id << "\n";
765 if (!pending.empty())
766 dump::dump_op_list(&pending);
767 );
768
769 assert(pending.empty());
770 assert(bb_pending.empty());
771 assert(ready.empty());
772
773 bb_pending.append_from(bb);
774 cur_bb = bb;
775
776 node *n;
777
778 while ((n = bb_pending.back())) {
779
780 PSC_DUMP(
781 cerr << "post_sched_bb ";
782 dump::dump_op(n);
783 cerr << "\n";
784 );
785
786 if (n->subtype == NST_ALU_CLAUSE) {
787 n->remove();
788 process_alu(static_cast<container_node*>(n));
789 continue;
790 }
791
792 n->remove();
793 bb->push_front(n);
794 }
795
796 this->cur_bb = NULL;
797 }
798
799 void post_scheduler::init_regmap() {
800
801 regmap.clear();
802
803 PSC_DUMP(
804 cerr << "init_regmap: live: ";
805 dump::dump_set(sh, live);
806 cerr << "\n";
807 );
808
809 for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
810 value *v = *I;
811 assert(v);
812 if (!v->is_sgpr() || !v->is_prealloc())
813 continue;
814
815 sel_chan r = v->gpr;
816
817 PSC_DUMP(
818 cerr << "init_regmap: " << r << " <= ";
819 dump::dump_val(v);
820 cerr << "\n";
821 );
822
823 assert(r);
824 regmap[r] = v;
825 }
826 }
827
828 void post_scheduler::process_alu(container_node *c) {
829
830 ucm.clear();
831 alu.reset();
832
833 live = c->live_after;
834
835 init_globals(c->live_after, true);
836 init_globals(c->live_before, true);
837
838 init_regmap();
839
840 update_local_interferences();
841
842 for (node_riterator N, I = c->rbegin(), E = c->rend(); I != E; I = N) {
843 N = I;
844 ++N;
845
846 node *n = *I;
847 unsigned uc = init_ucm(c, n);
848
849 PSC_DUMP(
850 cerr << "process_alu uc=" << uc << " ";
851 dump::dump_op(n);
852 cerr << " ";
853 );
854
855 if (uc) {
856 n->remove();
857 pending.push_back(n);
858 PSC_DUMP( cerr << "pending\n"; );
859 } else {
860 release_op(n);
861 }
862 }
863
864 schedule_alu(c);
865 }
866
867 void post_scheduler::update_local_interferences() {
868
869 PSC_DUMP(
870 cerr << "update_local_interferences : ";
871 dump::dump_set(sh, live);
872 cerr << "\n";
873 );
874
875
876 for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
877 value *v = *I;
878 if (v->is_prealloc())
879 continue;
880
881 v->interferences.add_set(live);
882 }
883 }
884
885 void post_scheduler::update_live_src_vec(vvec &vv, val_set *born, bool src) {
886 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
887 value *v = *I;
888
889 if (!v)
890 continue;
891
892 if (src && v->is_any_gpr()) {
893 if (live.add_val(v)) {
894 if (!v->is_prealloc()) {
895 if (!cleared_interf.contains(v)) {
896 PSC_DUMP(
897 cerr << "clearing interferences for " << *v << "\n";
898 );
899 v->interferences.clear();
900 cleared_interf.add_val(v);
901 }
902 }
903 if (born)
904 born->add_val(v);
905 }
906 } else if (v->is_rel()) {
907 if (!v->rel->is_any_gpr())
908 live.add_val(v->rel);
909 update_live_src_vec(v->muse, born, true);
910 }
911 }
912 }
913
914 void post_scheduler::update_live_dst_vec(vvec &vv) {
915 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
916 value *v = *I;
917 if (!v)
918 continue;
919
920 if (v->is_rel()) {
921 update_live_dst_vec(v->mdef);
922 } else if (v->is_any_gpr()) {
923 if (!live.remove_val(v)) {
924 PSC_DUMP(
925 cerr << "failed to remove ";
926 dump::dump_val(v);
927 cerr << " from live : ";
928 dump::dump_set(sh, live);
929 cerr << "\n";
930 );
931 }
932 }
933 }
934 }
935
936 void post_scheduler::update_live(node *n, val_set *born) {
937 update_live_dst_vec(n->dst);
938 update_live_src_vec(n->src, born, true);
939 update_live_src_vec(n->dst, born, false);
940 }
941
942 void post_scheduler::process_group() {
943 alu_group_tracker &rt = alu.grp();
944
945 val_set vals_born;
946
947 recolor_locals();
948
949 PSC_DUMP(
950 cerr << "process_group: live_before : ";
951 dump::dump_set(sh, live);
952 cerr << "\n";
953 );
954
955 for (unsigned s = 0; s < ctx.num_slots; ++s) {
956 alu_node *n = rt.slot(s);
957 if (!n)
958 continue;
959
960 update_live(n, &vals_born);
961 }
962
963 PSC_DUMP(
964 cerr << "process_group: live_after : ";
965 dump::dump_set(sh, live);
966 cerr << "\n";
967 );
968
969 update_local_interferences();
970
971 for (unsigned i = 0; i < 5; ++i) {
972 node *n = rt.slot(i);
973 if (n && !n->is_mova()) {
974 release_src_values(n);
975 }
976 }
977 }
978
979 void post_scheduler::init_globals(val_set &s, bool prealloc) {
980
981 PSC_DUMP(
982 cerr << "init_globals: ";
983 dump::dump_set(sh, s);
984 cerr << "\n";
985 );
986
987 for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
988 value *v = *I;
989 if (v->is_sgpr() && !v->is_global()) {
990 v->set_global();
991
992 if (prealloc && v->is_fixed()) {
993 v->set_prealloc();
994 }
995 }
996 }
997 }
998
999 void post_scheduler::emit_clause() {
1000
1001 if (alu.current_ar) {
1002 emit_load_ar();
1003 process_group();
1004 alu.emit_group();
1005 }
1006
1007 alu.emit_clause(cur_bb);
1008 }
1009
1010 void post_scheduler::schedule_alu(container_node *c) {
1011
1012 assert(!ready.empty() || !ready_copies.empty());
1013
1014 while (1) {
1015
1016 prev_regmap = regmap;
1017
1018 if (!prepare_alu_group()) {
1019 if (alu.current_ar) {
1020 emit_load_ar();
1021 continue;
1022 } else
1023 break;
1024 }
1025
1026 if (!alu.check_clause_limits()) {
1027 regmap = prev_regmap;
1028 emit_clause();
1029 init_globals(live, false);
1030 continue;
1031 }
1032
1033 process_group();
1034 alu.emit_group();
1035 };
1036
1037 if (!alu.is_empty()) {
1038 emit_clause();
1039 }
1040
1041 if (!ready.empty()) {
1042 cerr << "##post_scheduler: unscheduled ready instructions :";
1043 dump::dump_op_list(&ready);
1044 assert(!"unscheduled ready instructions");
1045 }
1046
1047 if (!pending.empty()) {
1048 cerr << "##post_scheduler: unscheduled pending instructions :";
1049 dump::dump_op_list(&pending);
1050 assert(!"unscheduled pending instructions");
1051 }
1052 }
1053
1054 void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
1055 unsigned chan = v->gpr.chan();
1056
1057 for (val_set::iterator I = vs.begin(sh), E = vs.end(sh);
1058 I != E; ++I) {
1059 value *vi = *I;
1060 sel_chan gpr = vi->get_final_gpr();
1061
1062 if (vi->is_any_gpr() && gpr && vi != v &&
1063 (!v->chunk || v->chunk != vi->chunk) &&
1064 vi->is_fixed() && gpr.chan() == chan) {
1065
1066 unsigned r = gpr.sel();
1067
1068 PSC_DUMP(
1069 cerr << "\tadd_interferences: " << *vi << "\n";
1070 );
1071
1072 if (rb.size() <= r)
1073 rb.resize(r + 32);
1074 rb.set(r);
1075 }
1076 }
1077 }
1078
1079 void post_scheduler::set_color_local_val(value *v, sel_chan color) {
1080 v->gpr = color;
1081
1082 PSC_DUMP(
1083 cerr << " recolored: ";
1084 dump::dump_val(v);
1085 cerr << "\n";
1086 );
1087 }
1088
1089 void post_scheduler::set_color_local(value *v, sel_chan color) {
1090 if (v->chunk) {
1091 vvec &vv = v->chunk->values;
1092 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1093 value *v2 =*I;
1094 set_color_local_val(v2, color);
1095 }
1096 v->chunk->fix();
1097 } else {
1098 set_color_local_val(v, color);
1099 v->fix();
1100 }
1101 }
1102
1103 bool post_scheduler::recolor_local(value *v) {
1104
1105 sb_bitset rb;
1106
1107 assert(v->is_sgpr());
1108 assert(!v->is_prealloc());
1109 assert(v->gpr);
1110
1111 unsigned chan = v->gpr.chan();
1112
1113 PSC_DUMP(
1114 cerr << "recolor_local: ";
1115 dump::dump_val(v);
1116 cerr << " interferences: ";
1117 dump::dump_set(sh, v->interferences);
1118 cerr << "\n";
1119 if (v->chunk) {
1120 cerr << " in chunk: ";
1121 coalescer::dump_chunk(v->chunk);
1122 cerr << "\n";
1123 }
1124 );
1125
1126 if (v->chunk) {
1127 for (vvec::iterator I = v->chunk->values.begin(),
1128 E = v->chunk->values.end(); I != E; ++I) {
1129 value *v2 = *I;
1130
1131 PSC_DUMP( cerr << " add_interferences for " << *v2 << " :\n"; );
1132
1133 add_interferences(v, rb, v2->interferences);
1134 }
1135 } else {
1136 add_interferences(v, rb, v->interferences);
1137 }
1138
1139 PSC_DUMP(
1140 unsigned sz = rb.size();
1141 cerr << "registers bits: " << sz;
1142 for (unsigned r = 0; r < sz; ++r) {
1143 if ((r & 7) == 0)
1144 cerr << "\n " << r << " ";
1145 cerr << (rb.get(r) ? 1 : 0);
1146 }
1147 );
1148
1149 bool no_temp_gprs = v->is_global();
1150 unsigned rs, re, pass = no_temp_gprs ? 1 : 0;
1151
1152 while (pass < 2) {
1153
1154 if (pass == 0) {
1155 rs = sh.first_temp_gpr();
1156 re = MAX_GPR;
1157 } else {
1158 rs = 0;
1159 re = sh.num_nontemp_gpr();
1160 }
1161
1162 for (unsigned reg = rs; reg < re; ++reg) {
1163 if (reg >= rb.size() || !rb.get(reg)) {
1164 // color found
1165 set_color_local(v, sel_chan(reg, chan));
1166 return true;
1167 }
1168 }
1169 ++pass;
1170 }
1171
1172 assert(!"recolor_local failed");
1173 return true;
1174 }
1175
1176 void post_scheduler::emit_load_ar() {
1177
1178 regmap = prev_regmap;
1179 alu.discard_current_group();
1180
1181 alu_group_tracker &rt = alu.grp();
1182 alu_node *a = alu.create_ar_load();
1183
1184 if (!rt.try_reserve(a)) {
1185 cerr << "can't emit AR load : ";
1186 dump::dump_op(a);
1187 cerr << "\n";
1188 }
1189
1190 alu.current_ar = 0;
1191 }
1192
1193 bool post_scheduler::unmap_dst_val(value *d) {
1194
1195 if (d == alu.current_ar) {
1196 emit_load_ar();
1197 return false;
1198 }
1199
1200 if (d->is_prealloc()) {
1201 sel_chan gpr = d->get_final_gpr();
1202 rv_map::iterator F = regmap.find(gpr);
1203 value *c = NULL;
1204 if (F != regmap.end())
1205 c = F->second;
1206
1207 if (c && c!=d && (!c->chunk || c->chunk != d->chunk)) {
1208 PSC_DUMP(
1209 cerr << "dst value conflict : ";
1210 dump::dump_val(d);
1211 cerr << " regmap contains ";
1212 dump::dump_val(c);
1213 cerr << "\n";
1214 );
1215 assert(!"scheduler error");
1216 return false;
1217 } else if (c) {
1218 regmap.erase(F);
1219 }
1220 }
1221 return true;
1222 }
1223
1224 bool post_scheduler::unmap_dst(alu_node *n) {
1225 value *d = n->dst.empty() ? NULL : n->dst[0];
1226
1227 if (!d)
1228 return true;
1229
1230 if (!d->is_rel()) {
1231 if (d && d->is_any_reg()) {
1232
1233 if (d->is_AR()) {
1234 if (alu.current_ar != d) {
1235 cerr << "loading wrong ar value\n";
1236 assert(0);
1237 } else {
1238 alu.current_ar = NULL;
1239 }
1240
1241 } else if (d->is_any_gpr()) {
1242 if (!unmap_dst_val(d))
1243 return false;
1244 }
1245 }
1246 } else {
1247 for (vvec::iterator I = d->mdef.begin(), E = d->mdef.end();
1248 I != E; ++I) {
1249 d = *I;
1250 if (!d)
1251 continue;
1252
1253 assert(d->is_any_gpr());
1254
1255 if (!unmap_dst_val(d))
1256 return false;
1257 }
1258 }
1259 return true;
1260 }
1261
1262 bool post_scheduler::map_src_val(value *v) {
1263
1264 if (!v->is_prealloc())
1265 return true;
1266
1267 sel_chan gpr = v->get_final_gpr();
1268 rv_map::iterator F = regmap.find(gpr);
1269 value *c = NULL;
1270 if (F != regmap.end()) {
1271 c = F->second;
1272 if (!v->v_equal(c)) {
1273 PSC_DUMP(
1274 cerr << "can't map src value ";
1275 dump::dump_val(v);
1276 cerr << ", regmap contains ";
1277 dump::dump_val(c);
1278 cerr << "\n";
1279 );
1280 return false;
1281 }
1282 } else {
1283 regmap.insert(std::make_pair(gpr, v));
1284 }
1285 return true;
1286 }
1287
1288 bool post_scheduler::map_src_vec(vvec &vv, bool src) {
1289 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1290 value *v = *I;
1291 if (!v)
1292 continue;
1293
1294 if ((!v->is_any_gpr() || !v->is_fixed()) && !v->is_rel())
1295 continue;
1296
1297 if (v->is_rel()) {
1298 value *rel = v->rel;
1299 assert(rel);
1300
1301 if (!rel->is_const()) {
1302 if (!map_src_vec(v->muse, true))
1303 return false;
1304
1305 if (rel != alu.current_ar) {
1306 if (alu.current_ar) {
1307 PSC_DUMP(
1308 cerr << " current_AR is " << *alu.current_ar
1309 << " trying to use " << *rel << "\n";
1310 );
1311 return false;
1312 }
1313
1314 alu.current_ar = rel;
1315
1316 PSC_DUMP(
1317 cerr << " new current_AR assigned: " << *alu.current_ar
1318 << "\n";
1319 );
1320 }
1321 }
1322
1323 } else if (src) {
1324 if (!map_src_val(v)) {
1325 return false;
1326 }
1327 }
1328 }
1329 return true;
1330 }
1331
1332 bool post_scheduler::map_src(alu_node *n) {
1333 if (!map_src_vec(n->dst, false))
1334 return false;
1335
1336 if (!map_src_vec(n->src, true))
1337 return false;
1338
1339 return true;
1340 }
1341
1342 void post_scheduler::dump_regmap() {
1343
1344 cerr << "# REGMAP :\n";
1345
1346 for(rv_map::iterator I = regmap.begin(), E = regmap.end(); I != E; ++I) {
1347 cerr << " # " << I->first << " => " << *(I->second) << "\n";
1348 }
1349
1350 if (alu.current_ar)
1351 cerr << " current_AR: " << *alu.current_ar << "\n";
1352 if (alu.current_pr)
1353 cerr << " current_PR: " << *alu.current_pr << "\n";
1354 }
1355
1356 void post_scheduler::recolor_locals() {
1357 alu_group_tracker &rt = alu.grp();
1358
1359 for (unsigned s = 0; s < ctx.num_slots; ++s) {
1360 alu_node *n = rt.slot(s);
1361 if (n) {
1362 value *d = n->dst[0];
1363 if (d && d->is_sgpr() && !d->is_prealloc()) {
1364 recolor_local(d);
1365 }
1366 }
1367 }
1368 }
1369
1370 // returns true if there are interferences
1371 bool post_scheduler::check_interferences() {
1372
1373 alu_group_tracker &rt = alu.grp();
1374
1375 unsigned interf_slots;
1376
1377 bool discarded = false;
1378
1379 PSC_DUMP(
1380 cerr << "check_interferences: before: \n";
1381 dump_regmap();
1382 );
1383
1384 do {
1385
1386 interf_slots = 0;
1387
1388 for (unsigned s = 0; s < ctx.num_slots; ++s) {
1389 alu_node *n = rt.slot(s);
1390 if (n) {
1391 if (!unmap_dst(n)) {
1392 return true;
1393 }
1394 }
1395 }
1396
1397 for (unsigned s = 0; s < ctx.num_slots; ++s) {
1398 alu_node *n = rt.slot(s);
1399 if (n) {
1400 if (!map_src(n)) {
1401 interf_slots |= (1 << s);
1402 }
1403 }
1404 }
1405
1406 PSC_DUMP(
1407 for (unsigned i = 0; i < 5; ++i) {
1408 if (interf_slots & (1 << i)) {
1409 cerr << "!!!!!! interf slot: " << i << " : ";
1410 dump::dump_op(rt.slot(i));
1411 cerr << "\n";
1412 }
1413 }
1414 );
1415
1416 if (!interf_slots)
1417 break;
1418
1419 PSC_DUMP( cerr << "ci: discarding slots " << interf_slots << "\n"; );
1420
1421 rt.discard_slots(interf_slots, alu.conflict_nodes);
1422 regmap = prev_regmap;
1423 discarded = true;
1424
1425 } while(1);
1426
1427 PSC_DUMP(
1428 cerr << "check_interferences: after: \n";
1429 dump_regmap();
1430 );
1431
1432 return discarded;
1433 }
1434
1435 // add instruction(s) (alu_node or contents of alu_packed_node) to current group
1436 // returns the number of added instructions on success
1437 unsigned post_scheduler::try_add_instruction(node *n) {
1438
1439 alu_group_tracker &rt = alu.grp();
1440
1441 unsigned avail_slots = rt.avail_slots();
1442
1443 if (n->is_alu_packed()) {
1444 alu_packed_node *p = static_cast<alu_packed_node*>(n);
1445 unsigned slots = p->get_slot_mask();
1446 unsigned cnt = __builtin_popcount(slots);
1447
1448 if ((slots & avail_slots) != slots) {
1449 PSC_DUMP( cerr << " no slots \n"; );
1450 return 0;
1451 }
1452
1453 p->update_packed_items(ctx);
1454
1455 if (!rt.try_reserve(p)) {
1456 PSC_DUMP( cerr << " reservation failed \n"; );
1457 return 0;
1458 }
1459
1460 p->remove();
1461 return cnt;
1462
1463 } else {
1464 alu_node *a = static_cast<alu_node*>(n);
1465 value *d = a->dst.empty() ? NULL : a->dst[0];
1466
1467 if (d && d->is_special_reg()) {
1468 assert(a->bc.op_ptr->flags & AF_MOVA);
1469 d = NULL;
1470 }
1471
1472 unsigned allowed_slots = ctx.alu_slots_mask(a->bc.op_ptr);
1473 unsigned slot;
1474
1475 allowed_slots &= avail_slots;
1476
1477 if (!allowed_slots)
1478 return 0;
1479
1480 if (d) {
1481 slot = d->get_final_chan();
1482 a->bc.dst_chan = slot;
1483 allowed_slots &= (1 << slot) | 0x10;
1484 } else {
1485 if (a->bc.op_ptr->flags & AF_MOVA) {
1486 if (a->bc.slot_flags & AF_V)
1487 allowed_slots &= (1 << SLOT_X);
1488 else
1489 allowed_slots &= (1 << SLOT_TRANS);
1490 }
1491 }
1492
1493 // FIXME workaround for some problems with MULADD in trans slot on r700,
1494 // (is it really needed on r600?)
1495 if (a->bc.op == ALU_OP3_MULADD && !ctx.is_egcm()) {
1496 allowed_slots &= 0x0F;
1497 }
1498
1499 if (!allowed_slots) {
1500 PSC_DUMP( cerr << " no suitable slots\n"; );
1501 return 0;
1502 }
1503
1504 slot = __builtin_ctz(allowed_slots);
1505 a->bc.slot = slot;
1506
1507 PSC_DUMP( cerr << "slot: " << slot << "\n"; );
1508
1509 if (!rt.try_reserve(a)) {
1510 PSC_DUMP( cerr << " reservation failed\n"; );
1511 return 0;
1512 }
1513
1514 a->remove();
1515 return 1;
1516 }
1517 }
1518
1519 bool post_scheduler::check_copy(node *n) {
1520 if (!n->is_copy_mov())
1521 return false;
1522
1523 value *s = n->src[0];
1524 value *d = n->dst[0];
1525
1526 if (!s->is_sgpr() || !d->is_sgpr())
1527 return false;
1528
1529 if (!s->is_prealloc()) {
1530 recolor_local(s);
1531 }
1532
1533 if (s->gpr == d->gpr) {
1534
1535 PSC_DUMP(
1536 cerr << "check_copy: ";
1537 dump::dump_op(n);
1538 cerr << "\n";
1539 );
1540
1541 rv_map::iterator F = regmap.find(d->gpr);
1542 bool gpr_free = (F == regmap.end());
1543
1544 if (d->is_prealloc()) {
1545 if (gpr_free) {
1546 PSC_DUMP( cerr << " copy not ready...\n";);
1547 return true;
1548 }
1549
1550 value *rv = F->second;
1551 if (rv != d && (!rv->chunk || rv->chunk != d->chunk)) {
1552 PSC_DUMP( cerr << " copy not ready(2)...\n";);
1553 return true;
1554 }
1555
1556 unmap_dst(static_cast<alu_node*>(n));
1557 }
1558
1559 if (s->is_prealloc() && !map_src_val(s))
1560 return true;
1561
1562 update_live(n, NULL);
1563
1564 release_src_values(n);
1565 n->remove();
1566 PSC_DUMP( cerr << " copy coalesced...\n";);
1567 return true;
1568 }
1569 return false;
1570 }
1571
1572 void post_scheduler::dump_group(alu_group_tracker &rt) {
1573 for (unsigned i = 0; i < 5; ++i) {
1574 node *n = rt.slot(i);
1575 if (n) {
1576 cerr << "slot " << i << " : ";
1577 dump::dump_op(n);
1578 cerr << "\n";
1579 }
1580 }
1581 }
1582
1583 void post_scheduler::process_ready_copies() {
1584
1585 node *last;
1586
1587 do {
1588 last = ready_copies.back();
1589
1590 for (node_iterator N, I = ready_copies.begin(), E = ready_copies.end();
1591 I != E; I = N) {
1592 N = I; ++N;
1593
1594 node *n = *I;
1595
1596 if (!check_copy(n)) {
1597 n->remove();
1598 ready.push_back(n);
1599 }
1600 }
1601 } while (last != ready_copies.back());
1602
1603 update_local_interferences();
1604 }
1605
1606
1607 bool post_scheduler::prepare_alu_group() {
1608
1609 alu_group_tracker &rt = alu.grp();
1610
1611 unsigned i1 = 0;
1612
1613 PSC_DUMP(
1614 cerr << "prepare_alu_group: starting...\n";
1615 dump_group(rt);
1616 );
1617
1618 ready.append_from(&alu.conflict_nodes);
1619
1620 // FIXME rework this loop
1621
1622 do {
1623
1624 process_ready_copies();
1625
1626 ++i1;
1627
1628 for (node_iterator N, I = ready.begin(), E = ready.end(); I != E;
1629 I = N) {
1630 N = I; ++N;
1631 node *n = *I;
1632
1633 PSC_DUMP(
1634 cerr << "p_a_g: ";
1635 dump::dump_op(n);
1636 cerr << "\n";
1637 );
1638
1639
1640 unsigned cnt = try_add_instruction(n);
1641
1642 if (!cnt)
1643 continue;
1644
1645 PSC_DUMP(
1646 cerr << "current group:\n";
1647 dump_group(rt);
1648 );
1649
1650 if (rt.inst_count() == ctx.num_slots) {
1651 PSC_DUMP( cerr << " all slots used\n"; );
1652 break;
1653 }
1654 }
1655
1656 if (!check_interferences())
1657 break;
1658
1659 // don't try to add more instructions to the group with mova if this
1660 // can lead to breaking clause slot count limit - we don't want mova to
1661 // end up in the end of the new clause instead of beginning of the
1662 // current clause.
1663 if (rt.has_ar_load() && alu.total_slots() > 121)
1664 break;
1665
1666 if (rt.inst_count() && i1 > 50)
1667 break;
1668
1669 regmap = prev_regmap;
1670
1671 } while (1);
1672
1673 PSC_DUMP(
1674 cerr << " prepare_alu_group done, " << rt.inst_count()
1675 << " slot(s) \n";
1676
1677 cerr << "$$$$$$$$PAG i1=" << i1
1678 << " ready " << ready.count()
1679 << " pending " << pending.count()
1680 << " conflicting " << alu.conflict_nodes.count()
1681 <<"\n";
1682
1683 );
1684
1685 return rt.inst_count();
1686 }
1687
1688 void post_scheduler::release_src_values(node* n) {
1689 release_src_vec(n->src, true);
1690 release_src_vec(n->dst, false);
1691 }
1692
1693 void post_scheduler::release_op(node *n) {
1694 PSC_DUMP(
1695 cerr << "release_op ";
1696 dump::dump_op(n);
1697 cerr << "\n";
1698 );
1699
1700 n->remove();
1701
1702 if (n->is_copy_mov()) {
1703 ready_copies.push_back(n);
1704 } else if (n->is_mova() || n->is_pred_set()) {
1705 ready.push_front(n);
1706 } else {
1707 ready.push_back(n);
1708 }
1709 }
1710
1711 void post_scheduler::release_src_val(value *v) {
1712 node *d = v->any_def();
1713 if (d) {
1714 if (!--ucm[d])
1715 release_op(d);
1716 }
1717 }
1718
1719 void post_scheduler::release_src_vec(vvec& vv, bool src) {
1720
1721 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1722 value *v = *I;
1723 if (!v || v->is_readonly())
1724 continue;
1725
1726 if (v->is_rel()) {
1727 release_src_val(v->rel);
1728 release_src_vec(v->muse, true);
1729
1730 } else if (src) {
1731 release_src_val(v);
1732 }
1733 }
1734 }
1735
1736 void literal_tracker::reset() {
1737 memset(lt, 0, sizeof(lt));
1738 memset(uc, 0, sizeof(uc));
1739 }
1740
1741 void rp_gpr_tracker::reset() {
1742 memset(rp, 0, sizeof(rp));
1743 memset(uc, 0, sizeof(uc));
1744 }
1745
1746 void rp_kcache_tracker::reset() {
1747 memset(rp, 0, sizeof(rp));
1748 memset(uc, 0, sizeof(uc));
1749 }
1750
1751 void alu_kcache_tracker::reset() {
1752 memset(kc, 0, sizeof(kc));
1753 lines.clear();
1754 }
1755
1756 void alu_clause_tracker::reset() {
1757 group = 0;
1758 slot_count = 0;
1759 grp0.reset();
1760 grp1.reset();
1761 }
1762
1763 alu_clause_tracker::alu_clause_tracker(shader &sh)
1764 : sh(sh), kt(sh.get_ctx().hw_class), slot_count(),
1765 grp0(sh), grp1(sh),
1766 group(), clause(),
1767 push_exec_mask(),
1768 current_ar(), current_pr() {}
1769
1770 void alu_clause_tracker::emit_group() {
1771
1772 assert(grp().inst_count());
1773
1774 alu_group_node *g = grp().emit();
1775
1776 if (grp().has_update_exec_mask()) {
1777 assert(!push_exec_mask);
1778 push_exec_mask = true;
1779 }
1780
1781 assert(g);
1782
1783 if (!clause) {
1784 clause = sh.create_clause(NST_ALU_CLAUSE);
1785 }
1786
1787 clause->push_front(g);
1788
1789 slot_count += grp().slot_count();
1790
1791 new_group();
1792
1793 PSC_DUMP( cerr << " #### group emitted\n"; );
1794 }
1795
1796 void alu_clause_tracker::emit_clause(container_node *c) {
1797 assert(clause);
1798
1799 kt.init_clause(clause->bc);
1800
1801 assert(!current_ar);
1802 assert(!current_pr);
1803
1804 if (push_exec_mask)
1805 clause->bc.set_op(CF_OP_ALU_PUSH_BEFORE);
1806
1807 c->push_front(clause);
1808
1809 clause = NULL;
1810 push_exec_mask = false;
1811 slot_count = 0;
1812 kt.reset();
1813
1814 PSC_DUMP( cerr << "######### ALU clause emitted\n"; );
1815 }
1816
1817 bool alu_clause_tracker::check_clause_limits() {
1818
1819 alu_group_tracker &gt = grp();
1820
1821 unsigned slots = gt.slot_count();
1822
1823 // reserving slots to load AR and PR values
1824 unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
1825
1826 if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
1827 return false;
1828
1829 if (!kt.try_reserve(gt))
1830 return false;
1831
1832 return true;
1833 }
1834
1835 void alu_clause_tracker::new_group() {
1836 group = !group;
1837 grp().reset();
1838 }
1839
1840 bool alu_clause_tracker::is_empty() {
1841 return clause == NULL;
1842 }
1843
1844 void literal_tracker::init_group_literals(alu_group_node* g) {
1845
1846 g->literals.clear();
1847 for (unsigned i = 0; i < 4; ++i) {
1848 if (!lt[i])
1849 break;
1850
1851 g->literals.push_back(lt[i]);
1852
1853 PSC_DUMP(
1854 cerr << "literal emitted: " << lt[i].f
1855 << " 0x" << std::hex << lt[i].u
1856 << std::dec << " " << lt[i].i << "\n";
1857 );
1858 }
1859 }
1860
1861 bool alu_kcache_tracker::try_reserve(alu_group_tracker& gt) {
1862 rp_kcache_tracker &kt = gt.kcache();
1863
1864 if (!kt.num_sels())
1865 return true;
1866
1867 sb_set<unsigned> group_lines;
1868
1869 unsigned nl = kt.get_lines(group_lines);
1870 assert(nl);
1871
1872 sb_set<unsigned> clause_lines(lines);
1873 lines.add_set(group_lines);
1874
1875 if (clause_lines.size() == lines.size())
1876 return true;
1877
1878 if (update_kc())
1879 return true;
1880
1881 lines = clause_lines;
1882
1883 return false;
1884 }
1885
1886 unsigned rp_kcache_tracker::get_lines(kc_lines& lines) {
1887 unsigned cnt = 0;
1888
1889 for (unsigned i = 0; i < sel_count; ++i) {
1890 unsigned line = rp[i];
1891
1892 if (!line)
1893 return cnt;
1894
1895 --line;
1896 line = (sel_count == 2) ? line >> 5 : line >> 6;
1897
1898 if (lines.insert(line).second)
1899 ++cnt;
1900 }
1901 return cnt;
1902 }
1903
1904 bool alu_kcache_tracker::update_kc() {
1905 unsigned c = 0;
1906
1907 bc_kcache old_kc[4];
1908 memcpy(old_kc, kc, sizeof(kc));
1909
1910 for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) {
1911 unsigned line = *I;
1912 unsigned bank = line >> 8;
1913
1914 line &= 0xFF;
1915
1916 if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line))
1917 ++kc[c-1].mode;
1918 else {
1919 if (c == max_kcs) {
1920 memcpy(kc, old_kc, sizeof(kc));
1921 return false;
1922 }
1923
1924 kc[c].mode = KC_LOCK_1;
1925
1926 kc[c].bank = bank;
1927 kc[c].addr = line;
1928 ++c;
1929 }
1930 }
1931 return true;
1932 }
1933
1934 alu_node* alu_clause_tracker::create_ar_load() {
1935 alu_node *a = sh.create_alu();
1936
1937 // FIXME use MOVA_GPR on R6xx
1938
1939 if (sh.get_ctx().uses_mova_gpr) {
1940 a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
1941 a->bc.slot = SLOT_TRANS;
1942 } else {
1943 a->bc.set_op(ALU_OP1_MOVA_INT);
1944 a->bc.slot = SLOT_X;
1945 }
1946
1947 a->dst.resize(1);
1948 a->src.push_back(current_ar);
1949
1950 PSC_DUMP(
1951 cerr << "created AR load: ";
1952 dump::dump_op(a);
1953 cerr << "\n";
1954 );
1955
1956 return a;
1957 }
1958
1959 void alu_clause_tracker::discard_current_group() {
1960 PSC_DUMP( cerr << "act::discard_current_group\n"; );
1961 grp().discard_all_slots(conflict_nodes);
1962 }
1963
1964 void rp_gpr_tracker::dump() {
1965 cerr << "=== gpr_tracker dump:\n";
1966 for (int c = 0; c < 3; ++c) {
1967 cerr << "cycle " << c << " ";
1968 for (int h = 0; h < 4; ++h) {
1969 cerr << rp[c][h] << ":" << uc[c][h] << " ";
1970 }
1971 cerr << "\n";
1972 }
1973 }
1974
1975 } // namespace r600_sb