nv50/ir: import new shader backend code
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir_ra.cpp
1
2 #include "nv50_ir.h"
3 #include "nv50_ir_target.h"
4
5 #include "nv50/nv50_debug.h"
6
7 namespace nv50_ir {
8
9 #define MAX_REGISTER_FILE_SIZE 256
10
11 class RegisterSet
12 {
13 public:
14 RegisterSet();
15 RegisterSet(const Target *);
16
17 void init(const Target *);
18 void reset(); // reset allocation status, but not max assigned regs
19
20 void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
21 void intersect(DataFile f, const RegisterSet *);
22
23 bool assign(Value **, int nr);
24 void release(const Value *);
25 void occupy(const Value *);
26
27 int getMaxAssigned(DataFile f) const { return fill[f]; }
28
29 void print() const;
30
31 private:
32 uint32_t bits[FILE_ADDRESS + 1][(MAX_REGISTER_FILE_SIZE + 31) / 32];
33
34 int unit[FILE_ADDRESS + 1]; // log2 of allocation granularity
35
36 int last[FILE_ADDRESS + 1];
37 int fill[FILE_ADDRESS + 1];
38 };
39
40 void
41 RegisterSet::reset()
42 {
43 memset(bits, 0, sizeof(bits));
44 }
45
46 RegisterSet::RegisterSet()
47 {
48 reset();
49 }
50
51 void
52 RegisterSet::init(const Target *targ)
53 {
54 for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
55 DataFile f = static_cast<DataFile>(rf);
56 last[rf] = targ->getFileSize(f) - 1;
57 unit[rf] = targ->getFileUnit(f);
58 fill[rf] = -1;
59 assert(last[rf] < MAX_REGISTER_FILE_SIZE);
60 }
61 }
62
63 RegisterSet::RegisterSet(const Target *targ)
64 {
65 reset();
66 init(targ);
67 }
68
69 void
70 RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
71 {
72 for (int i = 0; i < (last[f] + 31) / 32; ++i)
73 bits[f][i] = (bits[f][i] | lock) & ~unlock;
74 }
75
76 void
77 RegisterSet::intersect(DataFile f, const RegisterSet *set)
78 {
79 for (int i = 0; i < (last[f] + 31) / 32; ++i)
80 bits[f][i] |= set->bits[f][i];
81 }
82
83 void
84 RegisterSet::print() const
85 {
86 INFO("GPR:");
87 for (int i = 0; i < (last[FILE_GPR] + 31) / 32; ++i)
88 INFO(" %08x", bits[FILE_GPR][i]);
89 INFO("\n");
90 }
91
92 bool
93 RegisterSet::assign(Value **def, int nr)
94 {
95 DataFile f = def[0]->reg.file;
96 int n = nr;
97 if (n == 3)
98 n = 4;
99 int s = (n * def[0]->reg.size) >> unit[f];
100 uint32_t m = (1 << s) - 1;
101
102 int id = last[f] + 1;
103 int i;
104
105 for (i = 0; (i * 32) < last[f]; ++i) {
106 if (bits[f][i] == 0xffffffff)
107 continue;
108
109 for (id = 0; id < 32; id += s)
110 if (!(bits[f][i] & (m << id)))
111 break;
112 if (id < 32)
113 break;
114 }
115 id += i * 32;
116 if (id > last[f])
117 return false;
118
119 bits[f][id / 32] |= m << (id % 32);
120
121 if (id + (s - 1) > fill[f])
122 fill[f] = id + (s - 1);
123
124 for (i = 0; i < nr; ++i, ++id)
125 if (!def[i]->livei.isEmpty()) // XXX: really increased id if empty ?
126 def[i]->reg.data.id = id;
127 return true;
128 }
129
130 void
131 RegisterSet::occupy(const Value *val)
132 {
133 int id = val->reg.data.id;
134 if (id < 0)
135 return;
136 unsigned int f = val->reg.file;
137
138 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
139
140 INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %x\n", f, id, m);
141
142 bits[f][id / 32] |= m << (id % 32);
143
144 if (fill[f] < id)
145 fill[f] = id;
146 }
147
148 void
149 RegisterSet::release(const Value *val)
150 {
151 int id = val->reg.data.id;
152 if (id < 0)
153 return;
154 unsigned int f = val->reg.file;
155
156 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
157
158 INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %x\n", f, id, m);
159
160 bits[f][id / 32] &= ~(m << (id % 32));
161 }
162
163 #define JOIN_MASK_PHI (1 << 0)
164 #define JOIN_MASK_UNION (1 << 1)
165 #define JOIN_MASK_MOV (1 << 2)
166 #define JOIN_MASK_TEX (1 << 3)
167 #define JOIN_MASK_CONSTRAINT (1 << 4)
168
169 class RegAlloc
170 {
171 public:
172 RegAlloc(Program *program) : prog(program), sequence(0) { }
173
174 bool exec();
175 bool execFunc();
176
177 private:
178 bool coalesceValues(unsigned int mask);
179 bool linearScan();
180 bool allocateConstrainedValues();
181
182 private:
183 class PhiMovesPass : public Pass {
184 private:
185 virtual bool visit(BasicBlock *);
186 inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
187 };
188
189 class BuildIntervalsPass : public Pass {
190 private:
191 virtual bool visit(BasicBlock *);
192 void collectLiveValues(BasicBlock *);
193 void addLiveRange(Value *, const BasicBlock *, int end);
194 };
195
196 class InsertConstraintsPass : public Pass {
197 public:
198 bool exec(Function *func);
199 private:
200 virtual bool visit(BasicBlock *);
201
202 bool insertConstraintMoves();
203
204 void addHazard(Instruction *i, const ValueRef *src);
205 void textureMask(TexInstruction *);
206 void addConstraint(Instruction *, int s, int n);
207 bool detectConflict(Instruction *, int s);
208
209 DLList constrList;
210 };
211
212 bool buildLiveSets(BasicBlock *);
213 void collectLValues(DLList&, bool assignedOnly);
214
215 void insertOrderedTail(DLList&, Value *);
216 inline Instruction *insnBySerial(int);
217
218 private:
219 Program *prog;
220 Function *func;
221
222 // instructions in control flow / chronological order
223 ArrayList insns;
224
225 int sequence; // for manual passes through CFG
226 };
227
228 Instruction *
229 RegAlloc::insnBySerial(int serial)
230 {
231 return reinterpret_cast<Instruction *>(insns.get(serial));
232 }
233
234 void
235 RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
236 const BasicBlock *bb,
237 int end)
238 {
239 Instruction *insn = val->getUniqueInsn();
240
241 if (!insn)
242 return;
243 assert(bb->getFirst()->serial <= bb->getExit()->serial);
244 assert(bb->getExit()->serial + 1 >= end);
245
246 int begin = insn->serial;
247 if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
248 begin = bb->getEntry()->serial;
249
250 INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
251 val->id, begin, insn->serial, end);
252
253 if (begin != end) // empty ranges are only added as hazards for fixed regs
254 val->livei.extend(begin, end);
255 }
256
257 bool
258 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
259 {
260 if (b->cfg.incidentCount() <= 1)
261 return false;
262
263 int n = 0;
264 for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
265 if (ei.getType() == Graph::Edge::TREE ||
266 ei.getType() == Graph::Edge::FORWARD)
267 ++n;
268 return (n == 2);
269 }
270
271 // For each operand of each PHI in b, generate a new value by inserting a MOV
272 // at the end of the block it is coming from and replace the operand with its
273 // result. This eliminates liveness conflicts and enables us to let values be
274 // copied to the right register if such a conflict exists nonetheless.
275 //
276 // These MOVs are also crucial in making sure the live intervals of phi srces
277 // are extended until the end of the loop, since they are not included in the
278 // live-in sets.
279 bool
280 RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
281 {
282 Instruction *phi, *mov;
283 BasicBlock *pb, *pn;
284
285 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
286 pb = pn = BasicBlock::get(ei.getNode());
287 assert(pb);
288
289 if (needNewElseBlock(bb, pb)) {
290 pn = new BasicBlock(func);
291
292 // deletes an edge, iterator is invalid after this:
293 pb->cfg.detach(&bb->cfg);
294 pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
295 pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); // XXX: check order !
296
297 assert(pb->getExit()->op != OP_CALL);
298 if (pb->getExit()->asFlow()->target.bb == bb)
299 pb->getExit()->asFlow()->target.bb = pn;
300 break;
301 }
302 }
303
304 // insert MOVs (phi->src[j] should stem from j-th in-BB)
305 int j = 0;
306 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
307 pb = BasicBlock::get(ei.getNode());
308 if (!pb->isTerminated())
309 pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
310
311 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
312 mov = new_Instruction(func, OP_MOV, TYPE_U32);
313
314 mov->setSrc(0, phi->getSrc(j));
315 mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
316 phi->setSrc(j, mov->getDef(0));
317
318 pb->insertBefore(pb->getExit(), mov);
319 }
320 ++j;
321 }
322
323 return true;
324 }
325
326 // Build the set of live-in variables of bb.
327 bool
328 RegAlloc::buildLiveSets(BasicBlock *bb)
329 {
330 BasicBlock *bn;
331 Instruction *i;
332 unsigned int s, d;
333
334 INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
335
336 bb->liveSet.allocate(func->allLValues.getSize(), false);
337
338 int n = 0;
339 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
340 bn = BasicBlock::get(ei.getNode());
341 if (bn == bb)
342 continue;
343 if (bn->cfg.visit(sequence))
344 if (!buildLiveSets(bn))
345 return false;
346 if (n++ == 0)
347 bb->liveSet = bn->liveSet;
348 else
349 bb->liveSet |= bn->liveSet;
350 }
351 if (!n && !bb->liveSet.marker)
352 bb->liveSet.fill(0);
353 bb->liveSet.marker = true;
354
355 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
356 INFO("BB:%i live set of out blocks:\n", bb->getId());
357 bb->liveSet.print();
358 }
359
360 // if (!bb->getEntry())
361 // return true;
362
363 for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
364 for (d = 0; i->defExists(d); ++d)
365 bb->liveSet.clr(i->getDef(d)->id);
366 for (s = 0; i->srcExists(s); ++s)
367 if (i->getSrc(s)->asLValue())
368 bb->liveSet.set(i->getSrc(s)->id);
369 }
370 for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
371 bb->liveSet.clr(i->getDef(0)->id);
372
373 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
374 INFO("BB:%i live set after propagation:\n", bb->getId());
375 bb->liveSet.print();
376 }
377
378 return true;
379 }
380
381 void
382 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
383 {
384 BasicBlock *bbA = NULL, *bbB = NULL;
385
386 assert(bb->cfg.incidentCount() || bb->liveSet.popCount() == 0);
387
388 if (bb->cfg.outgoingCount()) {
389 // trickery to save a loop of OR'ing liveSets
390 // aliasing works fine with BitSet::setOr
391 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
392 if (ei.getType() == Graph::Edge::DUMMY)
393 continue;
394 if (bbA) {
395 bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
396 bbA = bb;
397 } else {
398 bbA = bbB;
399 }
400 bbB = BasicBlock::get(ei.getNode());
401 }
402 bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
403 } else
404 if (bb->cfg.incidentCount()) {
405 bb->liveSet.fill(0);
406 }
407 }
408
409 bool
410 RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
411 {
412 collectLiveValues(bb);
413
414 INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
415
416 // go through out blocks and delete phi sources that do not originate from
417 // the current block from the live set
418 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
419 BasicBlock *out = BasicBlock::get(ei.getNode());
420
421 for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
422 bb->liveSet.clr(i->getDef(0)->id);
423
424 for (int s = 0; s < NV50_IR_MAX_SRCS && i->src[s].exists(); ++s) {
425 assert(i->src[s].getInsn());
426 if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
427 bb->liveSet.set(i->getSrc(s)->id);
428 else
429 bb->liveSet.clr(i->getSrc(s)->id);
430 }
431 }
432 }
433
434 // remaining live-outs are live until end
435 if (bb->getExit()) {
436 for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
437 if (bb->liveSet.test(j))
438 addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
439 }
440
441 for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
442 for (int d = 0; i->defExists(d); ++d) {
443 bb->liveSet.clr(i->getDef(d)->id);
444 if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
445 i->getDef(d)->livei.extend(i->serial, i->serial);
446 }
447
448 for (int s = 0; i->srcExists(s); ++s) {
449 if (!i->getSrc(s)->asLValue())
450 continue;
451 if (!bb->liveSet.test(i->getSrc(s)->id)) {
452 bb->liveSet.set(i->getSrc(s)->id);
453 addLiveRange(i->getSrc(s), bb, i->serial);
454 }
455 }
456 }
457
458 return true;
459 }
460
461 bool
462 RegAlloc::coalesceValues(unsigned int mask)
463 {
464 int c, n;
465
466 for (n = 0; n < insns.getSize(); ++n) {
467 Instruction *i;
468 Instruction *insn = insnBySerial(n);
469
470 switch (insn->op) {
471 case OP_PHI:
472 if (!(mask & JOIN_MASK_PHI))
473 break;
474 for (c = 0; insn->srcExists(c); ++c)
475 if (!insn->getDef(0)->coalesce(insn->getSrc(c), false)) {
476 ERROR("failed to coalesce phi operands\n");
477 return false;
478 }
479 break;
480 case OP_UNION:
481 if (!(mask & JOIN_MASK_UNION))
482 break;
483 for (c = 0; insn->srcExists(c); ++c)
484 insn->getDef(0)->coalesce(insn->getSrc(c), true);
485 break;
486 case OP_CONSTRAINT:
487 if (!(mask & JOIN_MASK_CONSTRAINT))
488 break;
489 for (c = 0; c < 4 && insn->srcExists(c); ++c)
490 insn->getDef(c)->coalesce(insn->getSrc(c), true);
491 break;
492 case OP_MOV:
493 if (!(mask & JOIN_MASK_MOV))
494 break;
495 i = insn->getSrc(0)->getUniqueInsn();
496 if (i && !i->constrainedDefs())
497 insn->getDef(0)->coalesce(insn->getSrc(0), false);
498 break;
499 case OP_TEX:
500 case OP_TXB:
501 case OP_TXL:
502 case OP_TXF:
503 case OP_TXQ:
504 case OP_TXD:
505 case OP_TXG:
506 case OP_TEXCSAA:
507 if (!(mask & JOIN_MASK_TEX))
508 break;
509 for (c = 0; c < 4 && insn->srcExists(c); ++c)
510 insn->getDef(c)->coalesce(insn->getSrc(c), true);
511 break;
512 default:
513 break;
514 }
515 }
516 return true;
517 }
518
519 void
520 RegAlloc::insertOrderedTail(DLList &list, Value *val)
521 {
522 // we insert the live intervals in order, so this should be short
523 DLList::Iterator iter = list.revIterator();
524 const int begin = val->livei.begin();
525 for (; !iter.end(); iter.next()) {
526 if (reinterpret_cast<Value *>(iter.get())->livei.begin() <= begin)
527 break;
528 }
529 iter.insert(val);
530 }
531
532 static void
533 checkList(DLList &list)
534 {
535 Value *prev = NULL;
536 Value *next = NULL;
537
538 for (DLList::Iterator iter = list.iterator(); !iter.end(); iter.next()) {
539 next = Value::get(iter);
540 assert(next);
541 if (prev) {
542 assert(prev->livei.begin() <= next->livei.begin());
543 }
544 assert(next->join == next);
545 prev = next;
546 }
547 }
548
549 void
550 RegAlloc::collectLValues(DLList &list, bool assignedOnly)
551 {
552 for (int n = 0; n < insns.getSize(); ++n) {
553 Instruction *i = insnBySerial(n);
554
555 for (int d = 0; i->defExists(d); ++d)
556 if (!i->getDef(d)->livei.isEmpty())
557 if (!assignedOnly || i->getDef(d)->reg.data.id >= 0)
558 insertOrderedTail(list, i->getDef(d));
559 }
560 checkList(list);
561 }
562
563 bool
564 RegAlloc::allocateConstrainedValues()
565 {
566 Value *defs[4];
567 RegisterSet regSet[4];
568 DLList regVals;
569
570 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: allocating constrained values\n");
571
572 collectLValues(regVals, true);
573
574 for (int c = 0; c < 4; ++c)
575 regSet[c].init(prog->getTarget());
576
577 for (int n = 0; n < insns.getSize(); ++n) {
578 Instruction *i = insnBySerial(n);
579
580 const int vecSize = i->defCount(0xf);
581 if (vecSize < 2)
582 continue;
583 assert(vecSize <= 4);
584
585 for (int c = 0; c < vecSize; ++c)
586 defs[c] = i->def[c].rep();
587
588 if (defs[0]->reg.data.id >= 0) {
589 for (int c = 1; c < vecSize; ++c) {
590 assert(defs[c]->reg.data.id >= 0);
591 }
592 continue;
593 }
594
595 for (int c = 0; c < vecSize; ++c) {
596 uint32_t mask;
597 regSet[c].reset();
598
599 for (DLList::Iterator it = regVals.iterator(); !it.end(); it.next()) {
600 Value *rVal = Value::get(it);
601 if (rVal->reg.data.id >= 0 && rVal->livei.overlaps(defs[c]->livei))
602 regSet[c].occupy(rVal);
603 }
604 mask = 0x11111111;
605 if (vecSize == 2) // granularity is 2 instead of 4
606 mask |= 0x11111111 << 2;
607 regSet[c].periodicMask(defs[0]->reg.file, 0, ~(mask << c));
608
609 if (!defs[c]->livei.isEmpty())
610 insertOrderedTail(regVals, defs[c]);
611 }
612 for (int c = 1; c < vecSize; ++c)
613 regSet[0].intersect(defs[0]->reg.file, &regSet[c]);
614
615 if (!regSet[0].assign(&defs[0], vecSize)) // TODO: spilling
616 return false;
617 }
618 for (int c = 0; c < 4; c += 2)
619 if (regSet[c].getMaxAssigned(FILE_GPR) > prog->maxGPR)
620 prog->maxGPR = regSet[c].getMaxAssigned(FILE_GPR);
621 return true;
622 }
623
624 bool
625 RegAlloc::linearScan()
626 {
627 Value *cur, *val;
628 DLList unhandled, active, inactive;
629 RegisterSet f(prog->getTarget()), free(prog->getTarget());
630
631 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: linear scan\n");
632
633 collectLValues(unhandled, false);
634
635 for (DLList::Iterator cI = unhandled.iterator(); !cI.end();) {
636 cur = Value::get(cI);
637 cI.erase();
638
639 for (DLList::Iterator aI = active.iterator(); !aI.end();) {
640 val = Value::get(aI);
641 if (val->livei.end() <= cur->livei.begin()) {
642 free.release(val);
643 aI.erase();
644 } else
645 if (!val->livei.contains(cur->livei.begin())) {
646 free.release(val);
647 aI.moveToList(inactive);
648 } else {
649 aI.next();
650 }
651 }
652
653 for (DLList::Iterator iI = inactive.iterator(); !iI.end();) {
654 val = Value::get(iI);
655 if (val->livei.end() <= cur->livei.begin()) {
656 iI.erase();
657 } else
658 if (val->livei.contains(cur->livei.begin())) {
659 free.occupy(val);
660 iI.moveToList(active);
661 } else {
662 iI.next();
663 }
664 }
665 f = free;
666
667 for (DLList::Iterator iI = inactive.iterator(); !iI.end(); iI.next()) {
668 val = Value::get(iI);
669 if (val->livei.overlaps(cur->livei))
670 f.occupy(val);
671 }
672
673 for (DLList::Iterator uI = unhandled.iterator(); !uI.end(); uI.next()) {
674 val = Value::get(uI);
675 if (val->reg.data.id >= 0 && val->livei.overlaps(cur->livei))
676 f.occupy(val);
677 }
678
679 if (cur->reg.data.id < 0) {
680 bool spill = !f.assign(&cur, 1);
681 if (spill) {
682 ERROR("out of registers of file %u\n", cur->reg.file);
683 abort();
684 }
685 }
686 free.occupy(cur);
687 active.insert(cur);
688 }
689
690 if (f.getMaxAssigned(FILE_GPR) > prog->maxGPR)
691 prog->maxGPR = f.getMaxAssigned(FILE_GPR);
692 if (free.getMaxAssigned(FILE_GPR) > prog->maxGPR)
693 prog->maxGPR = free.getMaxAssigned(FILE_GPR);
694 return true;
695 }
696
697 bool
698 RegAlloc::exec()
699 {
700 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
701 !fi.end(); fi.next()) {
702 func = reinterpret_cast<Function *>(fi.get());
703 if (!execFunc())
704 return false;
705 }
706 return true;
707 }
708
709 bool
710 RegAlloc::execFunc()
711 {
712 InsertConstraintsPass insertConstr;
713 PhiMovesPass insertMoves;
714 BuildIntervalsPass buildIntervals;
715
716 unsigned int i;
717 bool ret;
718
719 ret = insertConstr.exec(func);
720 if (!ret)
721 goto out;
722
723 ret = insertMoves.run(func);
724 if (!ret)
725 goto out;
726
727 for (sequence = func->cfg.nextSequence(), i = 0;
728 ret && i <= func->loopNestingBound;
729 sequence = func->cfg.nextSequence(), ++i)
730 ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
731 if (!ret)
732 goto out;
733
734 func->orderInstructions(this->insns);
735
736 ret = buildIntervals.run(func);
737 if (!ret)
738 goto out;
739
740 ret = coalesceValues(JOIN_MASK_PHI);
741 if (!ret)
742 goto out;
743 switch (prog->getTarget()->getChipset() & 0xf0) {
744 case 0x50:
745 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_TEX);
746 break;
747 case 0xc0:
748 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_CONSTRAINT);
749 break;
750 default:
751 break;
752 }
753 if (!ret)
754 goto out;
755 ret = coalesceValues(JOIN_MASK_MOV);
756 if (!ret)
757 goto out;
758
759 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
760 func->print();
761 func->printLiveIntervals();
762 }
763
764 ret = allocateConstrainedValues() && linearScan();
765 if (!ret)
766 goto out;
767
768 out:
769 // TODO: should probably call destructor on LValues later instead
770 for (ArrayList::Iterator it = func->allLValues.iterator();
771 !it.end(); it.next())
772 reinterpret_cast<LValue *>(it.get())->livei.clear();
773
774 return ret;
775 }
776
777 bool Program::registerAllocation()
778 {
779 RegAlloc ra(this);
780 return ra.exec();
781 }
782
783 bool
784 RegAlloc::InsertConstraintsPass::exec(Function *ir)
785 {
786 constrList.clear();
787
788 bool ret = run(ir, true, true);
789 if (ret)
790 ret = insertConstraintMoves();
791 return ret;
792 }
793
794 // TODO: make part of texture insn
795 void
796 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
797 {
798 Value *def[4];
799 int c, k, d;
800 uint8_t mask = 0;
801
802 for (d = 0, k = 0, c = 0; c < 4; ++c) {
803 if (!(tex->tex.mask & (1 << c)))
804 continue;
805 if (tex->getDef(k)->refCount()) {
806 mask |= 1 << c;
807 def[d++] = tex->getDef(k);
808 }
809 ++k;
810 }
811 tex->tex.mask = mask;
812
813 #if 0 // reorder or set the unused ones NULL ?
814 for (c = 0; c < 4; ++c)
815 if (!(tex->tex.mask & (1 << c)))
816 def[d++] = tex->getDef(c);
817 #endif
818 for (c = 0; c < d; ++c)
819 tex->setDef(c, def[c]);
820 #if 1
821 for (; c < 4; ++c)
822 tex->setDef(c, NULL);
823 #endif
824 }
825
826 bool
827 RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
828 {
829 // current register allocation can't handle it if a value participates in
830 // multiple constraints
831 for (ValueRef::Iterator it = cst->src[s].iterator(); !it.end(); it.next()) {
832 Instruction *insn = it.get()->getInsn();
833 if (insn != cst)
834 return true;
835 }
836
837 // can start at s + 1 because detectConflict is called on all sources
838 for (int c = s + 1; cst->srcExists(c); ++c)
839 if (cst->getSrc(c) == cst->getSrc(s))
840 return true;
841
842 Instruction *defi = cst->getSrc(s)->getInsn();
843
844 return (!defi || defi->constrainedDefs());
845 }
846
847 void
848 RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
849 {
850 Instruction *cst;
851 int d;
852
853 // first, look for an existing identical constraint op
854 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
855 cst = reinterpret_cast<Instruction *>(it.get());
856 if (!i->bb->dominatedBy(cst->bb))
857 break;
858 for (d = 0; d < n; ++d)
859 if (cst->getSrc(d) != i->getSrc(d + s))
860 break;
861 if (d >= n) {
862 for (d = 0; d < n; ++d, ++s)
863 i->setSrc(s, cst->getDef(d));
864 return;
865 }
866 }
867 cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
868
869 for (d = 0; d < n; ++s, ++d) {
870 cst->setDef(d, new_LValue(func, FILE_GPR));
871 cst->setSrc(d, i->getSrc(s));
872 i->setSrc(s, cst->getDef(d));
873 }
874 i->bb->insertBefore(i, cst);
875
876 constrList.insert(cst);
877 }
878
879 // Add a dummy use of the pointer source of >= 8 byte loads after the load
880 // to prevent it from being assigned a register which overlapping the load's
881 // destination, which would produce random corruptions.
882 void
883 RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
884 {
885 Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
886 hzd->setSrc(0, src->get());
887 i->bb->insertAfter(i, hzd);
888
889 }
890
891 // Insert constraint markers for instructions whose multiple sources must be
892 // located in consecutive registers.
893 bool
894 RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
895 {
896 TexInstruction *tex;
897 Instruction *next;
898 int s, n, size;
899
900 for (Instruction *i = bb->getEntry(); i; i = next) {
901 next = i->next;
902
903 if ((tex = i->asTex())) {
904 textureMask(tex);
905
906 // FIXME: this is target specific
907 if (tex->op == OP_TXQ) {
908 s = tex->srcCount(0xff);
909 n = 0;
910 } else {
911 s = tex->tex.target.getArgCount();
912 if (!tex->tex.target.isArray() &&
913 (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
914 ++s;
915 n = tex->srcCount(0xff) - s;
916 assert(n <= 4);
917 }
918
919 if (s > 1)
920 addConstraint(i, 0, s);
921 if (n > 1)
922 addConstraint(i, s, n);
923 } else
924 if (i->op == OP_EXPORT || i->op == OP_STORE) {
925 for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
926 assert(i->srcExists(s));
927 size -= i->getSrc(s)->reg.size;
928 }
929 if ((s - 1) > 1)
930 addConstraint(i, 1, s - 1);
931 } else
932 if (i->op == OP_LOAD) {
933 if (i->src[0].isIndirect(0) && typeSizeof(i->dType) >= 8)
934 addHazard(i, i->src[0].getIndirect(0));
935 }
936 }
937 return true;
938 }
939
940 // Insert extra moves so that, if multiple register constraints on a value are
941 // in conflict, these conflicts can be resolved.
942 bool
943 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
944 {
945 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
946 Instruction *cst = reinterpret_cast<Instruction *>(it.get());
947
948 for (int s = 0; cst->srcExists(s); ++s) {
949 if (!detectConflict(cst, s))
950 continue;
951 Instruction *mov = new_Instruction(func, OP_MOV,
952 typeOfSize(cst->src[s].getSize()));
953 mov->setSrc(0, cst->getSrc(s));
954 mov->setDef(0, new_LValue(func, FILE_GPR));
955 cst->setSrc(s, mov->getDef(0));
956
957 cst->bb->insertBefore(cst, mov);
958 }
959 }
960 return true;
961 }
962
963 } // namespace nv50_ir