nv50/ir: make Instruction::src/def container private
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir_ra.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir.h"
24 #include "nv50_ir_target.h"
25
26 #include "nv50/nv50_debug.h"
27
28 namespace nv50_ir {
29
30 #define MAX_REGISTER_FILE_SIZE 256
31
32 class RegisterSet
33 {
34 public:
35 RegisterSet();
36 RegisterSet(const Target *);
37
38 void init(const Target *);
39 void reset(); // reset allocation status, but not max assigned regs
40
41 void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
42 void intersect(DataFile f, const RegisterSet *);
43
44 bool assign(Value **, int nr);
45 void release(const Value *);
46 void occupy(const Value *);
47
48 int getMaxAssigned(DataFile f) const { return fill[f]; }
49
50 void print() const;
51
52 private:
53 uint32_t bits[FILE_ADDRESS + 1][(MAX_REGISTER_FILE_SIZE + 31) / 32];
54
55 int unit[FILE_ADDRESS + 1]; // log2 of allocation granularity
56
57 int last[FILE_ADDRESS + 1];
58 int fill[FILE_ADDRESS + 1];
59 };
60
61 void
62 RegisterSet::reset()
63 {
64 memset(bits, 0, sizeof(bits));
65 }
66
67 RegisterSet::RegisterSet()
68 {
69 reset();
70 }
71
72 void
73 RegisterSet::init(const Target *targ)
74 {
75 for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
76 DataFile f = static_cast<DataFile>(rf);
77 last[rf] = targ->getFileSize(f) - 1;
78 unit[rf] = targ->getFileUnit(f);
79 fill[rf] = -1;
80 assert(last[rf] < MAX_REGISTER_FILE_SIZE);
81 }
82 }
83
84 RegisterSet::RegisterSet(const Target *targ)
85 {
86 reset();
87 init(targ);
88 }
89
90 void
91 RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
92 {
93 for (int i = 0; i < (last[f] + 31) / 32; ++i)
94 bits[f][i] = (bits[f][i] | lock) & ~unlock;
95 }
96
97 void
98 RegisterSet::intersect(DataFile f, const RegisterSet *set)
99 {
100 for (int i = 0; i < (last[f] + 31) / 32; ++i)
101 bits[f][i] |= set->bits[f][i];
102 }
103
104 void
105 RegisterSet::print() const
106 {
107 INFO("GPR:");
108 for (int i = 0; i < (last[FILE_GPR] + 31) / 32; ++i)
109 INFO(" %08x", bits[FILE_GPR][i]);
110 INFO("\n");
111 }
112
113 bool
114 RegisterSet::assign(Value **def, int nr)
115 {
116 DataFile f = def[0]->reg.file;
117 int n = nr;
118 if (n == 3)
119 n = 4;
120 int s = (n * def[0]->reg.size) >> unit[f];
121 uint32_t m = (1 << s) - 1;
122
123 int id = last[f] + 1;
124 int i;
125
126 for (i = 0; (i * 32) < last[f]; ++i) {
127 if (bits[f][i] == 0xffffffff)
128 continue;
129
130 for (id = 0; id < 32; id += s)
131 if (!(bits[f][i] & (m << id)))
132 break;
133 if (id < 32)
134 break;
135 }
136 id += i * 32;
137 if (id > last[f])
138 return false;
139
140 bits[f][id / 32] |= m << (id % 32);
141
142 if (id + (s - 1) > fill[f])
143 fill[f] = id + (s - 1);
144
145 for (i = 0; i < nr; ++i, ++id)
146 if (!def[i]->livei.isEmpty()) // XXX: really increased id if empty ?
147 def[i]->reg.data.id = id;
148 return true;
149 }
150
151 void
152 RegisterSet::occupy(const Value *val)
153 {
154 int id = val->reg.data.id;
155 if (id < 0)
156 return;
157 unsigned int f = val->reg.file;
158
159 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
160
161 INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %x\n", f, id, m);
162
163 bits[f][id / 32] |= m << (id % 32);
164
165 if (fill[f] < id)
166 fill[f] = id;
167 }
168
169 void
170 RegisterSet::release(const Value *val)
171 {
172 int id = val->reg.data.id;
173 if (id < 0)
174 return;
175 unsigned int f = val->reg.file;
176
177 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
178
179 INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %x\n", f, id, m);
180
181 bits[f][id / 32] &= ~(m << (id % 32));
182 }
183
184 #define JOIN_MASK_PHI (1 << 0)
185 #define JOIN_MASK_UNION (1 << 1)
186 #define JOIN_MASK_MOV (1 << 2)
187 #define JOIN_MASK_TEX (1 << 3)
188 #define JOIN_MASK_CONSTRAINT (1 << 4)
189
190 class RegAlloc
191 {
192 public:
193 RegAlloc(Program *program) : prog(program), sequence(0) { }
194
195 bool exec();
196 bool execFunc();
197
198 private:
199 bool coalesceValues(unsigned int mask);
200 bool linearScan();
201 bool allocateConstrainedValues();
202
203 private:
204 class PhiMovesPass : public Pass {
205 private:
206 virtual bool visit(BasicBlock *);
207 inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
208 };
209
210 class BuildIntervalsPass : public Pass {
211 private:
212 virtual bool visit(BasicBlock *);
213 void collectLiveValues(BasicBlock *);
214 void addLiveRange(Value *, const BasicBlock *, int end);
215 };
216
217 class InsertConstraintsPass : public Pass {
218 public:
219 bool exec(Function *func);
220 private:
221 virtual bool visit(BasicBlock *);
222
223 bool insertConstraintMoves();
224
225 void addHazard(Instruction *i, const ValueRef *src);
226 void textureMask(TexInstruction *);
227 void addConstraint(Instruction *, int s, int n);
228 bool detectConflict(Instruction *, int s);
229
230 DLList constrList;
231 };
232
233 bool buildLiveSets(BasicBlock *);
234 void collectLValues(DLList&, bool assignedOnly);
235
236 void insertOrderedTail(DLList&, Value *);
237 inline Instruction *insnBySerial(int);
238
239 private:
240 Program *prog;
241 Function *func;
242
243 // instructions in control flow / chronological order
244 ArrayList insns;
245
246 int sequence; // for manual passes through CFG
247 };
248
249 Instruction *
250 RegAlloc::insnBySerial(int serial)
251 {
252 return reinterpret_cast<Instruction *>(insns.get(serial));
253 }
254
255 void
256 RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
257 const BasicBlock *bb,
258 int end)
259 {
260 Instruction *insn = val->getUniqueInsn();
261
262 if (!insn)
263 return;
264 assert(bb->getFirst()->serial <= bb->getExit()->serial);
265 assert(bb->getExit()->serial + 1 >= end);
266
267 int begin = insn->serial;
268 if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
269 begin = bb->getEntry()->serial;
270
271 INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
272 val->id, begin, insn->serial, end);
273
274 if (begin != end) // empty ranges are only added as hazards for fixed regs
275 val->livei.extend(begin, end);
276 }
277
278 bool
279 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
280 {
281 if (b->cfg.incidentCount() <= 1)
282 return false;
283
284 int n = 0;
285 for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
286 if (ei.getType() == Graph::Edge::TREE ||
287 ei.getType() == Graph::Edge::FORWARD)
288 ++n;
289 return (n == 2);
290 }
291
292 // For each operand of each PHI in b, generate a new value by inserting a MOV
293 // at the end of the block it is coming from and replace the operand with its
294 // result. This eliminates liveness conflicts and enables us to let values be
295 // copied to the right register if such a conflict exists nonetheless.
296 //
297 // These MOVs are also crucial in making sure the live intervals of phi srces
298 // are extended until the end of the loop, since they are not included in the
299 // live-in sets.
300 bool
301 RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
302 {
303 Instruction *phi, *mov;
304 BasicBlock *pb, *pn;
305
306 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
307 pb = pn = BasicBlock::get(ei.getNode());
308 assert(pb);
309
310 if (needNewElseBlock(bb, pb)) {
311 pn = new BasicBlock(func);
312
313 // deletes an edge, iterator is invalid after this:
314 pb->cfg.detach(&bb->cfg);
315 pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
316 pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); // XXX: check order !
317
318 assert(pb->getExit()->op != OP_CALL);
319 if (pb->getExit()->asFlow()->target.bb == bb)
320 pb->getExit()->asFlow()->target.bb = pn;
321 break;
322 }
323 }
324
325 // insert MOVs (phi->src(j) should stem from j-th in-BB)
326 int j = 0;
327 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
328 pb = BasicBlock::get(ei.getNode());
329 if (!pb->isTerminated())
330 pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
331
332 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
333 mov = new_Instruction(func, OP_MOV, TYPE_U32);
334
335 mov->setSrc(0, phi->getSrc(j));
336 mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
337 phi->setSrc(j, mov->getDef(0));
338
339 pb->insertBefore(pb->getExit(), mov);
340 }
341 ++j;
342 }
343
344 return true;
345 }
346
347 // Build the set of live-in variables of bb.
348 bool
349 RegAlloc::buildLiveSets(BasicBlock *bb)
350 {
351 BasicBlock *bn;
352 Instruction *i;
353 unsigned int s, d;
354
355 INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
356
357 bb->liveSet.allocate(func->allLValues.getSize(), false);
358
359 int n = 0;
360 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
361 bn = BasicBlock::get(ei.getNode());
362 if (bn == bb)
363 continue;
364 if (bn->cfg.visit(sequence))
365 if (!buildLiveSets(bn))
366 return false;
367 if (n++ == 0)
368 bb->liveSet = bn->liveSet;
369 else
370 bb->liveSet |= bn->liveSet;
371 }
372 if (!n && !bb->liveSet.marker)
373 bb->liveSet.fill(0);
374 bb->liveSet.marker = true;
375
376 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
377 INFO("BB:%i live set of out blocks:\n", bb->getId());
378 bb->liveSet.print();
379 }
380
381 // if (!bb->getEntry())
382 // return true;
383
384 for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
385 for (d = 0; i->defExists(d); ++d)
386 bb->liveSet.clr(i->getDef(d)->id);
387 for (s = 0; i->srcExists(s); ++s)
388 if (i->getSrc(s)->asLValue())
389 bb->liveSet.set(i->getSrc(s)->id);
390 }
391 for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
392 bb->liveSet.clr(i->getDef(0)->id);
393
394 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
395 INFO("BB:%i live set after propagation:\n", bb->getId());
396 bb->liveSet.print();
397 }
398
399 return true;
400 }
401
402 void
403 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
404 {
405 BasicBlock *bbA = NULL, *bbB = NULL;
406
407 assert(bb->cfg.incidentCount() || bb->liveSet.popCount() == 0);
408
409 if (bb->cfg.outgoingCount()) {
410 // trickery to save a loop of OR'ing liveSets
411 // aliasing works fine with BitSet::setOr
412 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
413 if (ei.getType() == Graph::Edge::DUMMY)
414 continue;
415 if (bbA) {
416 bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
417 bbA = bb;
418 } else {
419 bbA = bbB;
420 }
421 bbB = BasicBlock::get(ei.getNode());
422 }
423 bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
424 } else
425 if (bb->cfg.incidentCount()) {
426 bb->liveSet.fill(0);
427 }
428 }
429
430 bool
431 RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
432 {
433 collectLiveValues(bb);
434
435 INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
436
437 // go through out blocks and delete phi sources that do not originate from
438 // the current block from the live set
439 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
440 BasicBlock *out = BasicBlock::get(ei.getNode());
441
442 for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
443 bb->liveSet.clr(i->getDef(0)->id);
444
445 for (int s = 0; i->srcExists(s); ++s) {
446 assert(i->src(s).getInsn());
447 if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
448 bb->liveSet.set(i->getSrc(s)->id);
449 else
450 bb->liveSet.clr(i->getSrc(s)->id);
451 }
452 }
453 }
454
455 // remaining live-outs are live until end
456 if (bb->getExit()) {
457 for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
458 if (bb->liveSet.test(j))
459 addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
460 }
461
462 for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
463 for (int d = 0; i->defExists(d); ++d) {
464 bb->liveSet.clr(i->getDef(d)->id);
465 if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
466 i->getDef(d)->livei.extend(i->serial, i->serial);
467 }
468
469 for (int s = 0; i->srcExists(s); ++s) {
470 if (!i->getSrc(s)->asLValue())
471 continue;
472 if (!bb->liveSet.test(i->getSrc(s)->id)) {
473 bb->liveSet.set(i->getSrc(s)->id);
474 addLiveRange(i->getSrc(s), bb, i->serial);
475 }
476 }
477 }
478
479 return true;
480 }
481
482 bool
483 RegAlloc::coalesceValues(unsigned int mask)
484 {
485 int c, n;
486
487 for (n = 0; n < insns.getSize(); ++n) {
488 Instruction *i;
489 Instruction *insn = insnBySerial(n);
490
491 switch (insn->op) {
492 case OP_PHI:
493 if (!(mask & JOIN_MASK_PHI))
494 break;
495 for (c = 0; insn->srcExists(c); ++c)
496 if (!insn->getDef(0)->coalesce(insn->getSrc(c), false)) {
497 ERROR("failed to coalesce phi operands\n");
498 return false;
499 }
500 break;
501 case OP_UNION:
502 if (!(mask & JOIN_MASK_UNION))
503 break;
504 for (c = 0; insn->srcExists(c); ++c)
505 insn->getDef(0)->coalesce(insn->getSrc(c), true);
506 break;
507 case OP_CONSTRAINT:
508 if (!(mask & JOIN_MASK_CONSTRAINT))
509 break;
510 for (c = 0; c < 4 && insn->srcExists(c); ++c)
511 insn->getDef(c)->coalesce(insn->getSrc(c), true);
512 break;
513 case OP_MOV:
514 if (!(mask & JOIN_MASK_MOV))
515 break;
516 i = NULL;
517 if (!insn->getDef(0)->uses.empty())
518 i = insn->getDef(0)->uses.front()->getInsn();
519 // if this is a contraint-move there will only be a single use
520 if (i && i->op == OP_CONSTRAINT)
521 break;
522 i = insn->getSrc(0)->getUniqueInsn();
523 if (i && !i->constrainedDefs())
524 insn->getDef(0)->coalesce(insn->getSrc(0), false);
525 break;
526 case OP_TEX:
527 case OP_TXB:
528 case OP_TXL:
529 case OP_TXF:
530 case OP_TXQ:
531 case OP_TXD:
532 case OP_TXG:
533 case OP_TEXCSAA:
534 if (!(mask & JOIN_MASK_TEX))
535 break;
536 for (c = 0; c < 4 && insn->srcExists(c); ++c)
537 insn->getDef(c)->coalesce(insn->getSrc(c), true);
538 break;
539 default:
540 break;
541 }
542 }
543 return true;
544 }
545
546 void
547 RegAlloc::insertOrderedTail(DLList &list, Value *val)
548 {
549 // we insert the live intervals in order, so this should be short
550 DLList::Iterator iter = list.revIterator();
551 const int begin = val->livei.begin();
552 for (; !iter.end(); iter.next()) {
553 if (reinterpret_cast<Value *>(iter.get())->livei.begin() <= begin)
554 break;
555 }
556 iter.insert(val);
557 }
558
559 static void
560 checkList(DLList &list)
561 {
562 Value *prev = NULL;
563 Value *next = NULL;
564
565 for (DLList::Iterator iter = list.iterator(); !iter.end(); iter.next()) {
566 next = Value::get(iter);
567 assert(next);
568 if (prev) {
569 assert(prev->livei.begin() <= next->livei.begin());
570 }
571 assert(next->join == next);
572 prev = next;
573 }
574 }
575
576 void
577 RegAlloc::collectLValues(DLList &list, bool assignedOnly)
578 {
579 for (int n = 0; n < insns.getSize(); ++n) {
580 Instruction *i = insnBySerial(n);
581
582 for (int d = 0; i->defExists(d); ++d)
583 if (!i->getDef(d)->livei.isEmpty())
584 if (!assignedOnly || i->getDef(d)->reg.data.id >= 0)
585 insertOrderedTail(list, i->getDef(d));
586 }
587 checkList(list);
588 }
589
590 bool
591 RegAlloc::allocateConstrainedValues()
592 {
593 Value *defs[4];
594 RegisterSet regSet[4];
595 DLList regVals;
596
597 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: allocating constrained values\n");
598
599 collectLValues(regVals, true);
600
601 for (int c = 0; c < 4; ++c)
602 regSet[c].init(prog->getTarget());
603
604 for (int n = 0; n < insns.getSize(); ++n) {
605 Instruction *i = insnBySerial(n);
606
607 const int vecSize = i->defCount(0xf);
608 if (vecSize < 2)
609 continue;
610 assert(vecSize <= 4);
611
612 for (int c = 0; c < vecSize; ++c)
613 defs[c] = i->def(c).rep();
614
615 if (defs[0]->reg.data.id >= 0) {
616 for (int c = 1; c < vecSize; ++c) {
617 assert(defs[c]->reg.data.id >= 0);
618 }
619 continue;
620 }
621
622 for (int c = 0; c < vecSize; ++c) {
623 uint32_t mask;
624 regSet[c].reset();
625
626 for (DLList::Iterator it = regVals.iterator(); !it.end(); it.next()) {
627 Value *rVal = Value::get(it);
628 if (rVal->reg.data.id >= 0 && rVal->livei.overlaps(defs[c]->livei))
629 regSet[c].occupy(rVal);
630 }
631 mask = 0x11111111;
632 if (vecSize == 2) // granularity is 2 instead of 4
633 mask |= 0x11111111 << 2;
634 regSet[c].periodicMask(defs[0]->reg.file, 0, ~(mask << c));
635
636 if (!defs[c]->livei.isEmpty())
637 insertOrderedTail(regVals, defs[c]);
638 }
639 for (int c = 1; c < vecSize; ++c)
640 regSet[0].intersect(defs[0]->reg.file, &regSet[c]);
641
642 if (!regSet[0].assign(&defs[0], vecSize)) // TODO: spilling
643 return false;
644 }
645 for (int c = 0; c < 4; c += 2)
646 if (regSet[c].getMaxAssigned(FILE_GPR) > prog->maxGPR)
647 prog->maxGPR = regSet[c].getMaxAssigned(FILE_GPR);
648 return true;
649 }
650
651 bool
652 RegAlloc::linearScan()
653 {
654 Value *cur, *val;
655 DLList unhandled, active, inactive;
656 RegisterSet f(prog->getTarget()), free(prog->getTarget());
657
658 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: linear scan\n");
659
660 collectLValues(unhandled, false);
661
662 for (DLList::Iterator cI = unhandled.iterator(); !cI.end();) {
663 cur = Value::get(cI);
664 cI.erase();
665
666 for (DLList::Iterator aI = active.iterator(); !aI.end();) {
667 val = Value::get(aI);
668 if (val->livei.end() <= cur->livei.begin()) {
669 free.release(val);
670 aI.erase();
671 } else
672 if (!val->livei.contains(cur->livei.begin())) {
673 free.release(val);
674 aI.moveToList(inactive);
675 } else {
676 aI.next();
677 }
678 }
679
680 for (DLList::Iterator iI = inactive.iterator(); !iI.end();) {
681 val = Value::get(iI);
682 if (val->livei.end() <= cur->livei.begin()) {
683 iI.erase();
684 } else
685 if (val->livei.contains(cur->livei.begin())) {
686 free.occupy(val);
687 iI.moveToList(active);
688 } else {
689 iI.next();
690 }
691 }
692 f = free;
693
694 for (DLList::Iterator iI = inactive.iterator(); !iI.end(); iI.next()) {
695 val = Value::get(iI);
696 if (val->livei.overlaps(cur->livei))
697 f.occupy(val);
698 }
699
700 for (DLList::Iterator uI = unhandled.iterator(); !uI.end(); uI.next()) {
701 val = Value::get(uI);
702 if (val->reg.data.id >= 0 && val->livei.overlaps(cur->livei))
703 f.occupy(val);
704 }
705
706 if (cur->reg.data.id < 0) {
707 bool spill = !f.assign(&cur, 1);
708 if (spill) {
709 ERROR("out of registers of file %u\n", cur->reg.file);
710 abort();
711 }
712 }
713 free.occupy(cur);
714 active.insert(cur);
715 }
716
717 if (f.getMaxAssigned(FILE_GPR) > prog->maxGPR)
718 prog->maxGPR = f.getMaxAssigned(FILE_GPR);
719 if (free.getMaxAssigned(FILE_GPR) > prog->maxGPR)
720 prog->maxGPR = free.getMaxAssigned(FILE_GPR);
721 return true;
722 }
723
724 bool
725 RegAlloc::exec()
726 {
727 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
728 !fi.end(); fi.next()) {
729 func = reinterpret_cast<Function *>(fi.get());
730 if (!execFunc())
731 return false;
732 }
733 return true;
734 }
735
736 bool
737 RegAlloc::execFunc()
738 {
739 InsertConstraintsPass insertConstr;
740 PhiMovesPass insertMoves;
741 BuildIntervalsPass buildIntervals;
742
743 unsigned int i;
744 bool ret;
745
746 ret = insertConstr.exec(func);
747 if (!ret)
748 goto out;
749
750 ret = insertMoves.run(func);
751 if (!ret)
752 goto out;
753
754 for (sequence = func->cfg.nextSequence(), i = 0;
755 ret && i <= func->loopNestingBound;
756 sequence = func->cfg.nextSequence(), ++i)
757 ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
758 if (!ret)
759 goto out;
760
761 func->orderInstructions(this->insns);
762
763 ret = buildIntervals.run(func);
764 if (!ret)
765 goto out;
766
767 ret = coalesceValues(JOIN_MASK_PHI);
768 if (!ret)
769 goto out;
770 switch (prog->getTarget()->getChipset() & 0xf0) {
771 case 0x50:
772 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_TEX);
773 break;
774 case 0xc0:
775 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_CONSTRAINT);
776 break;
777 default:
778 break;
779 }
780 if (!ret)
781 goto out;
782 ret = coalesceValues(JOIN_MASK_MOV);
783 if (!ret)
784 goto out;
785
786 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
787 func->print();
788 func->printLiveIntervals();
789 }
790
791 ret = allocateConstrainedValues() && linearScan();
792 if (!ret)
793 goto out;
794
795 out:
796 // TODO: should probably call destructor on LValues later instead
797 for (ArrayList::Iterator it = func->allLValues.iterator();
798 !it.end(); it.next())
799 reinterpret_cast<LValue *>(it.get())->livei.clear();
800
801 return ret;
802 }
803
804 bool Program::registerAllocation()
805 {
806 RegAlloc ra(this);
807 return ra.exec();
808 }
809
810 bool
811 RegAlloc::InsertConstraintsPass::exec(Function *ir)
812 {
813 constrList.clear();
814
815 bool ret = run(ir, true, true);
816 if (ret)
817 ret = insertConstraintMoves();
818 return ret;
819 }
820
821 // TODO: make part of texture insn
822 void
823 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
824 {
825 Value *def[4];
826 int c, k, d;
827 uint8_t mask = 0;
828
829 for (d = 0, k = 0, c = 0; c < 4; ++c) {
830 if (!(tex->tex.mask & (1 << c)))
831 continue;
832 if (tex->getDef(k)->refCount()) {
833 mask |= 1 << c;
834 def[d++] = tex->getDef(k);
835 }
836 ++k;
837 }
838 tex->tex.mask = mask;
839
840 #if 0 // reorder or set the unused ones NULL ?
841 for (c = 0; c < 4; ++c)
842 if (!(tex->tex.mask & (1 << c)))
843 def[d++] = tex->getDef(c);
844 #endif
845 for (c = 0; c < d; ++c)
846 tex->setDef(c, def[c]);
847 #if 1
848 for (; c < 4; ++c)
849 tex->setDef(c, NULL);
850 #endif
851 }
852
853 bool
854 RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
855 {
856 Value *v = cst->getSrc(s);
857
858 // current register allocation can't handle it if a value participates in
859 // multiple constraints
860 for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) {
861 if (cst != (*it)->getInsn())
862 return true;
863 }
864
865 // can start at s + 1 because detectConflict is called on all sources
866 for (int c = s + 1; cst->srcExists(c); ++c)
867 if (v == cst->getSrc(c))
868 return true;
869
870 Instruction *defi = v->getInsn();
871
872 return (!defi || defi->constrainedDefs());
873 }
874
875 void
876 RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
877 {
878 Instruction *cst;
879 int d;
880
881 // first, look for an existing identical constraint op
882 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
883 cst = reinterpret_cast<Instruction *>(it.get());
884 if (!i->bb->dominatedBy(cst->bb))
885 break;
886 for (d = 0; d < n; ++d)
887 if (cst->getSrc(d) != i->getSrc(d + s))
888 break;
889 if (d >= n) {
890 for (d = 0; d < n; ++d, ++s)
891 i->setSrc(s, cst->getDef(d));
892 return;
893 }
894 }
895 cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
896
897 for (d = 0; d < n; ++s, ++d) {
898 cst->setDef(d, new_LValue(func, FILE_GPR));
899 cst->setSrc(d, i->getSrc(s));
900 i->setSrc(s, cst->getDef(d));
901 }
902 i->bb->insertBefore(i, cst);
903
904 constrList.insert(cst);
905 }
906
907 // Add a dummy use of the pointer source of >= 8 byte loads after the load
908 // to prevent it from being assigned a register which overlapping the load's
909 // destination, which would produce random corruptions.
910 void
911 RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
912 {
913 Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
914 hzd->setSrc(0, src->get());
915 i->bb->insertAfter(i, hzd);
916
917 }
918
919 // Insert constraint markers for instructions whose multiple sources must be
920 // located in consecutive registers.
921 bool
922 RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
923 {
924 TexInstruction *tex;
925 Instruction *next;
926 int s, n, size;
927
928 for (Instruction *i = bb->getEntry(); i; i = next) {
929 next = i->next;
930
931 if ((tex = i->asTex())) {
932 textureMask(tex);
933
934 // FIXME: this is target specific
935 if (tex->op == OP_TXQ) {
936 s = tex->srcCount(0xff);
937 n = 0;
938 } else {
939 s = tex->tex.target.getArgCount();
940 if (!tex->tex.target.isArray() &&
941 (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
942 ++s;
943 if (tex->op == OP_TXD && tex->tex.useOffsets)
944 ++s;
945 n = tex->srcCount(0xff) - s;
946 assert(n <= 4);
947 }
948
949 if (s > 1)
950 addConstraint(i, 0, s);
951 if (n > 1)
952 addConstraint(i, s, n);
953 } else
954 if (i->op == OP_EXPORT || i->op == OP_STORE) {
955 for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
956 assert(i->srcExists(s));
957 size -= i->getSrc(s)->reg.size;
958 }
959 if ((s - 1) > 1)
960 addConstraint(i, 1, s - 1);
961 } else
962 if (i->op == OP_LOAD) {
963 if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
964 addHazard(i, i->src(0).getIndirect(0));
965 }
966 }
967 return true;
968 }
969
970 // Insert extra moves so that, if multiple register constraints on a value are
971 // in conflict, these conflicts can be resolved.
972 bool
973 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
974 {
975 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
976 Instruction *cst = reinterpret_cast<Instruction *>(it.get());
977
978 for (int s = 0; cst->srcExists(s); ++s) {
979 if (!detectConflict(cst, s))
980 continue;
981 Instruction *mov = new_Instruction(func, OP_MOV,
982 typeOfSize(cst->src(s).getSize()));
983 mov->setSrc(0, cst->getSrc(s));
984 mov->setDef(0, new_LValue(func, FILE_GPR));
985 cst->setSrc(s, mov->getDef(0));
986
987 cst->bb->insertBefore(cst, mov);
988 }
989 }
990 return true;
991 }
992
993 } // namespace nv50_ir