nv50/ir: add missing license headers
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir_ra.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir.h"
24 #include "nv50_ir_target.h"
25
26 #include "nv50/nv50_debug.h"
27
28 namespace nv50_ir {
29
30 #define MAX_REGISTER_FILE_SIZE 256
31
32 class RegisterSet
33 {
34 public:
35 RegisterSet();
36 RegisterSet(const Target *);
37
38 void init(const Target *);
39 void reset(); // reset allocation status, but not max assigned regs
40
41 void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
42 void intersect(DataFile f, const RegisterSet *);
43
44 bool assign(Value **, int nr);
45 void release(const Value *);
46 void occupy(const Value *);
47
48 int getMaxAssigned(DataFile f) const { return fill[f]; }
49
50 void print() const;
51
52 private:
53 uint32_t bits[FILE_ADDRESS + 1][(MAX_REGISTER_FILE_SIZE + 31) / 32];
54
55 int unit[FILE_ADDRESS + 1]; // log2 of allocation granularity
56
57 int last[FILE_ADDRESS + 1];
58 int fill[FILE_ADDRESS + 1];
59 };
60
61 void
62 RegisterSet::reset()
63 {
64 memset(bits, 0, sizeof(bits));
65 }
66
67 RegisterSet::RegisterSet()
68 {
69 reset();
70 }
71
72 void
73 RegisterSet::init(const Target *targ)
74 {
75 for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
76 DataFile f = static_cast<DataFile>(rf);
77 last[rf] = targ->getFileSize(f) - 1;
78 unit[rf] = targ->getFileUnit(f);
79 fill[rf] = -1;
80 assert(last[rf] < MAX_REGISTER_FILE_SIZE);
81 }
82 }
83
84 RegisterSet::RegisterSet(const Target *targ)
85 {
86 reset();
87 init(targ);
88 }
89
90 void
91 RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
92 {
93 for (int i = 0; i < (last[f] + 31) / 32; ++i)
94 bits[f][i] = (bits[f][i] | lock) & ~unlock;
95 }
96
97 void
98 RegisterSet::intersect(DataFile f, const RegisterSet *set)
99 {
100 for (int i = 0; i < (last[f] + 31) / 32; ++i)
101 bits[f][i] |= set->bits[f][i];
102 }
103
104 void
105 RegisterSet::print() const
106 {
107 INFO("GPR:");
108 for (int i = 0; i < (last[FILE_GPR] + 31) / 32; ++i)
109 INFO(" %08x", bits[FILE_GPR][i]);
110 INFO("\n");
111 }
112
113 bool
114 RegisterSet::assign(Value **def, int nr)
115 {
116 DataFile f = def[0]->reg.file;
117 int n = nr;
118 if (n == 3)
119 n = 4;
120 int s = (n * def[0]->reg.size) >> unit[f];
121 uint32_t m = (1 << s) - 1;
122
123 int id = last[f] + 1;
124 int i;
125
126 for (i = 0; (i * 32) < last[f]; ++i) {
127 if (bits[f][i] == 0xffffffff)
128 continue;
129
130 for (id = 0; id < 32; id += s)
131 if (!(bits[f][i] & (m << id)))
132 break;
133 if (id < 32)
134 break;
135 }
136 id += i * 32;
137 if (id > last[f])
138 return false;
139
140 bits[f][id / 32] |= m << (id % 32);
141
142 if (id + (s - 1) > fill[f])
143 fill[f] = id + (s - 1);
144
145 for (i = 0; i < nr; ++i, ++id)
146 if (!def[i]->livei.isEmpty()) // XXX: really increased id if empty ?
147 def[i]->reg.data.id = id;
148 return true;
149 }
150
151 void
152 RegisterSet::occupy(const Value *val)
153 {
154 int id = val->reg.data.id;
155 if (id < 0)
156 return;
157 unsigned int f = val->reg.file;
158
159 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
160
161 INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %x\n", f, id, m);
162
163 bits[f][id / 32] |= m << (id % 32);
164
165 if (fill[f] < id)
166 fill[f] = id;
167 }
168
169 void
170 RegisterSet::release(const Value *val)
171 {
172 int id = val->reg.data.id;
173 if (id < 0)
174 return;
175 unsigned int f = val->reg.file;
176
177 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
178
179 INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %x\n", f, id, m);
180
181 bits[f][id / 32] &= ~(m << (id % 32));
182 }
183
184 #define JOIN_MASK_PHI (1 << 0)
185 #define JOIN_MASK_UNION (1 << 1)
186 #define JOIN_MASK_MOV (1 << 2)
187 #define JOIN_MASK_TEX (1 << 3)
188 #define JOIN_MASK_CONSTRAINT (1 << 4)
189
190 class RegAlloc
191 {
192 public:
193 RegAlloc(Program *program) : prog(program), sequence(0) { }
194
195 bool exec();
196 bool execFunc();
197
198 private:
199 bool coalesceValues(unsigned int mask);
200 bool linearScan();
201 bool allocateConstrainedValues();
202
203 private:
204 class PhiMovesPass : public Pass {
205 private:
206 virtual bool visit(BasicBlock *);
207 inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
208 };
209
210 class BuildIntervalsPass : public Pass {
211 private:
212 virtual bool visit(BasicBlock *);
213 void collectLiveValues(BasicBlock *);
214 void addLiveRange(Value *, const BasicBlock *, int end);
215 };
216
217 class InsertConstraintsPass : public Pass {
218 public:
219 bool exec(Function *func);
220 private:
221 virtual bool visit(BasicBlock *);
222
223 bool insertConstraintMoves();
224
225 void addHazard(Instruction *i, const ValueRef *src);
226 void textureMask(TexInstruction *);
227 void addConstraint(Instruction *, int s, int n);
228 bool detectConflict(Instruction *, int s);
229
230 DLList constrList;
231 };
232
233 bool buildLiveSets(BasicBlock *);
234 void collectLValues(DLList&, bool assignedOnly);
235
236 void insertOrderedTail(DLList&, Value *);
237 inline Instruction *insnBySerial(int);
238
239 private:
240 Program *prog;
241 Function *func;
242
243 // instructions in control flow / chronological order
244 ArrayList insns;
245
246 int sequence; // for manual passes through CFG
247 };
248
249 Instruction *
250 RegAlloc::insnBySerial(int serial)
251 {
252 return reinterpret_cast<Instruction *>(insns.get(serial));
253 }
254
255 void
256 RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
257 const BasicBlock *bb,
258 int end)
259 {
260 Instruction *insn = val->getUniqueInsn();
261
262 if (!insn)
263 return;
264 assert(bb->getFirst()->serial <= bb->getExit()->serial);
265 assert(bb->getExit()->serial + 1 >= end);
266
267 int begin = insn->serial;
268 if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
269 begin = bb->getEntry()->serial;
270
271 INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
272 val->id, begin, insn->serial, end);
273
274 if (begin != end) // empty ranges are only added as hazards for fixed regs
275 val->livei.extend(begin, end);
276 }
277
278 bool
279 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
280 {
281 if (b->cfg.incidentCount() <= 1)
282 return false;
283
284 int n = 0;
285 for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
286 if (ei.getType() == Graph::Edge::TREE ||
287 ei.getType() == Graph::Edge::FORWARD)
288 ++n;
289 return (n == 2);
290 }
291
292 // For each operand of each PHI in b, generate a new value by inserting a MOV
293 // at the end of the block it is coming from and replace the operand with its
294 // result. This eliminates liveness conflicts and enables us to let values be
295 // copied to the right register if such a conflict exists nonetheless.
296 //
297 // These MOVs are also crucial in making sure the live intervals of phi srces
298 // are extended until the end of the loop, since they are not included in the
299 // live-in sets.
300 bool
301 RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
302 {
303 Instruction *phi, *mov;
304 BasicBlock *pb, *pn;
305
306 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
307 pb = pn = BasicBlock::get(ei.getNode());
308 assert(pb);
309
310 if (needNewElseBlock(bb, pb)) {
311 pn = new BasicBlock(func);
312
313 // deletes an edge, iterator is invalid after this:
314 pb->cfg.detach(&bb->cfg);
315 pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
316 pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); // XXX: check order !
317
318 assert(pb->getExit()->op != OP_CALL);
319 if (pb->getExit()->asFlow()->target.bb == bb)
320 pb->getExit()->asFlow()->target.bb = pn;
321 break;
322 }
323 }
324
325 // insert MOVs (phi->src[j] should stem from j-th in-BB)
326 int j = 0;
327 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
328 pb = BasicBlock::get(ei.getNode());
329 if (!pb->isTerminated())
330 pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
331
332 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
333 mov = new_Instruction(func, OP_MOV, TYPE_U32);
334
335 mov->setSrc(0, phi->getSrc(j));
336 mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
337 phi->setSrc(j, mov->getDef(0));
338
339 pb->insertBefore(pb->getExit(), mov);
340 }
341 ++j;
342 }
343
344 return true;
345 }
346
347 // Build the set of live-in variables of bb.
348 bool
349 RegAlloc::buildLiveSets(BasicBlock *bb)
350 {
351 BasicBlock *bn;
352 Instruction *i;
353 unsigned int s, d;
354
355 INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
356
357 bb->liveSet.allocate(func->allLValues.getSize(), false);
358
359 int n = 0;
360 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
361 bn = BasicBlock::get(ei.getNode());
362 if (bn == bb)
363 continue;
364 if (bn->cfg.visit(sequence))
365 if (!buildLiveSets(bn))
366 return false;
367 if (n++ == 0)
368 bb->liveSet = bn->liveSet;
369 else
370 bb->liveSet |= bn->liveSet;
371 }
372 if (!n && !bb->liveSet.marker)
373 bb->liveSet.fill(0);
374 bb->liveSet.marker = true;
375
376 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
377 INFO("BB:%i live set of out blocks:\n", bb->getId());
378 bb->liveSet.print();
379 }
380
381 // if (!bb->getEntry())
382 // return true;
383
384 for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
385 for (d = 0; i->defExists(d); ++d)
386 bb->liveSet.clr(i->getDef(d)->id);
387 for (s = 0; i->srcExists(s); ++s)
388 if (i->getSrc(s)->asLValue())
389 bb->liveSet.set(i->getSrc(s)->id);
390 }
391 for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
392 bb->liveSet.clr(i->getDef(0)->id);
393
394 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
395 INFO("BB:%i live set after propagation:\n", bb->getId());
396 bb->liveSet.print();
397 }
398
399 return true;
400 }
401
402 void
403 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
404 {
405 BasicBlock *bbA = NULL, *bbB = NULL;
406
407 assert(bb->cfg.incidentCount() || bb->liveSet.popCount() == 0);
408
409 if (bb->cfg.outgoingCount()) {
410 // trickery to save a loop of OR'ing liveSets
411 // aliasing works fine with BitSet::setOr
412 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
413 if (ei.getType() == Graph::Edge::DUMMY)
414 continue;
415 if (bbA) {
416 bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
417 bbA = bb;
418 } else {
419 bbA = bbB;
420 }
421 bbB = BasicBlock::get(ei.getNode());
422 }
423 bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
424 } else
425 if (bb->cfg.incidentCount()) {
426 bb->liveSet.fill(0);
427 }
428 }
429
430 bool
431 RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
432 {
433 collectLiveValues(bb);
434
435 INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
436
437 // go through out blocks and delete phi sources that do not originate from
438 // the current block from the live set
439 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
440 BasicBlock *out = BasicBlock::get(ei.getNode());
441
442 for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
443 bb->liveSet.clr(i->getDef(0)->id);
444
445 for (int s = 0; s < NV50_IR_MAX_SRCS && i->src[s].exists(); ++s) {
446 assert(i->src[s].getInsn());
447 if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
448 bb->liveSet.set(i->getSrc(s)->id);
449 else
450 bb->liveSet.clr(i->getSrc(s)->id);
451 }
452 }
453 }
454
455 // remaining live-outs are live until end
456 if (bb->getExit()) {
457 for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
458 if (bb->liveSet.test(j))
459 addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
460 }
461
462 for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
463 for (int d = 0; i->defExists(d); ++d) {
464 bb->liveSet.clr(i->getDef(d)->id);
465 if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
466 i->getDef(d)->livei.extend(i->serial, i->serial);
467 }
468
469 for (int s = 0; i->srcExists(s); ++s) {
470 if (!i->getSrc(s)->asLValue())
471 continue;
472 if (!bb->liveSet.test(i->getSrc(s)->id)) {
473 bb->liveSet.set(i->getSrc(s)->id);
474 addLiveRange(i->getSrc(s), bb, i->serial);
475 }
476 }
477 }
478
479 return true;
480 }
481
482 bool
483 RegAlloc::coalesceValues(unsigned int mask)
484 {
485 int c, n;
486
487 for (n = 0; n < insns.getSize(); ++n) {
488 Instruction *i;
489 Instruction *insn = insnBySerial(n);
490
491 switch (insn->op) {
492 case OP_PHI:
493 if (!(mask & JOIN_MASK_PHI))
494 break;
495 for (c = 0; insn->srcExists(c); ++c)
496 if (!insn->getDef(0)->coalesce(insn->getSrc(c), false)) {
497 ERROR("failed to coalesce phi operands\n");
498 return false;
499 }
500 break;
501 case OP_UNION:
502 if (!(mask & JOIN_MASK_UNION))
503 break;
504 for (c = 0; insn->srcExists(c); ++c)
505 insn->getDef(0)->coalesce(insn->getSrc(c), true);
506 break;
507 case OP_CONSTRAINT:
508 if (!(mask & JOIN_MASK_CONSTRAINT))
509 break;
510 for (c = 0; c < 4 && insn->srcExists(c); ++c)
511 insn->getDef(c)->coalesce(insn->getSrc(c), true);
512 break;
513 case OP_MOV:
514 if (!(mask & JOIN_MASK_MOV))
515 break;
516 i = insn->getSrc(0)->getUniqueInsn();
517 if (i && !i->constrainedDefs())
518 insn->getDef(0)->coalesce(insn->getSrc(0), false);
519 break;
520 case OP_TEX:
521 case OP_TXB:
522 case OP_TXL:
523 case OP_TXF:
524 case OP_TXQ:
525 case OP_TXD:
526 case OP_TXG:
527 case OP_TEXCSAA:
528 if (!(mask & JOIN_MASK_TEX))
529 break;
530 for (c = 0; c < 4 && insn->srcExists(c); ++c)
531 insn->getDef(c)->coalesce(insn->getSrc(c), true);
532 break;
533 default:
534 break;
535 }
536 }
537 return true;
538 }
539
540 void
541 RegAlloc::insertOrderedTail(DLList &list, Value *val)
542 {
543 // we insert the live intervals in order, so this should be short
544 DLList::Iterator iter = list.revIterator();
545 const int begin = val->livei.begin();
546 for (; !iter.end(); iter.next()) {
547 if (reinterpret_cast<Value *>(iter.get())->livei.begin() <= begin)
548 break;
549 }
550 iter.insert(val);
551 }
552
553 static void
554 checkList(DLList &list)
555 {
556 Value *prev = NULL;
557 Value *next = NULL;
558
559 for (DLList::Iterator iter = list.iterator(); !iter.end(); iter.next()) {
560 next = Value::get(iter);
561 assert(next);
562 if (prev) {
563 assert(prev->livei.begin() <= next->livei.begin());
564 }
565 assert(next->join == next);
566 prev = next;
567 }
568 }
569
570 void
571 RegAlloc::collectLValues(DLList &list, bool assignedOnly)
572 {
573 for (int n = 0; n < insns.getSize(); ++n) {
574 Instruction *i = insnBySerial(n);
575
576 for (int d = 0; i->defExists(d); ++d)
577 if (!i->getDef(d)->livei.isEmpty())
578 if (!assignedOnly || i->getDef(d)->reg.data.id >= 0)
579 insertOrderedTail(list, i->getDef(d));
580 }
581 checkList(list);
582 }
583
584 bool
585 RegAlloc::allocateConstrainedValues()
586 {
587 Value *defs[4];
588 RegisterSet regSet[4];
589 DLList regVals;
590
591 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: allocating constrained values\n");
592
593 collectLValues(regVals, true);
594
595 for (int c = 0; c < 4; ++c)
596 regSet[c].init(prog->getTarget());
597
598 for (int n = 0; n < insns.getSize(); ++n) {
599 Instruction *i = insnBySerial(n);
600
601 const int vecSize = i->defCount(0xf);
602 if (vecSize < 2)
603 continue;
604 assert(vecSize <= 4);
605
606 for (int c = 0; c < vecSize; ++c)
607 defs[c] = i->def[c].rep();
608
609 if (defs[0]->reg.data.id >= 0) {
610 for (int c = 1; c < vecSize; ++c) {
611 assert(defs[c]->reg.data.id >= 0);
612 }
613 continue;
614 }
615
616 for (int c = 0; c < vecSize; ++c) {
617 uint32_t mask;
618 regSet[c].reset();
619
620 for (DLList::Iterator it = regVals.iterator(); !it.end(); it.next()) {
621 Value *rVal = Value::get(it);
622 if (rVal->reg.data.id >= 0 && rVal->livei.overlaps(defs[c]->livei))
623 regSet[c].occupy(rVal);
624 }
625 mask = 0x11111111;
626 if (vecSize == 2) // granularity is 2 instead of 4
627 mask |= 0x11111111 << 2;
628 regSet[c].periodicMask(defs[0]->reg.file, 0, ~(mask << c));
629
630 if (!defs[c]->livei.isEmpty())
631 insertOrderedTail(regVals, defs[c]);
632 }
633 for (int c = 1; c < vecSize; ++c)
634 regSet[0].intersect(defs[0]->reg.file, &regSet[c]);
635
636 if (!regSet[0].assign(&defs[0], vecSize)) // TODO: spilling
637 return false;
638 }
639 for (int c = 0; c < 4; c += 2)
640 if (regSet[c].getMaxAssigned(FILE_GPR) > prog->maxGPR)
641 prog->maxGPR = regSet[c].getMaxAssigned(FILE_GPR);
642 return true;
643 }
644
645 bool
646 RegAlloc::linearScan()
647 {
648 Value *cur, *val;
649 DLList unhandled, active, inactive;
650 RegisterSet f(prog->getTarget()), free(prog->getTarget());
651
652 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: linear scan\n");
653
654 collectLValues(unhandled, false);
655
656 for (DLList::Iterator cI = unhandled.iterator(); !cI.end();) {
657 cur = Value::get(cI);
658 cI.erase();
659
660 for (DLList::Iterator aI = active.iterator(); !aI.end();) {
661 val = Value::get(aI);
662 if (val->livei.end() <= cur->livei.begin()) {
663 free.release(val);
664 aI.erase();
665 } else
666 if (!val->livei.contains(cur->livei.begin())) {
667 free.release(val);
668 aI.moveToList(inactive);
669 } else {
670 aI.next();
671 }
672 }
673
674 for (DLList::Iterator iI = inactive.iterator(); !iI.end();) {
675 val = Value::get(iI);
676 if (val->livei.end() <= cur->livei.begin()) {
677 iI.erase();
678 } else
679 if (val->livei.contains(cur->livei.begin())) {
680 free.occupy(val);
681 iI.moveToList(active);
682 } else {
683 iI.next();
684 }
685 }
686 f = free;
687
688 for (DLList::Iterator iI = inactive.iterator(); !iI.end(); iI.next()) {
689 val = Value::get(iI);
690 if (val->livei.overlaps(cur->livei))
691 f.occupy(val);
692 }
693
694 for (DLList::Iterator uI = unhandled.iterator(); !uI.end(); uI.next()) {
695 val = Value::get(uI);
696 if (val->reg.data.id >= 0 && val->livei.overlaps(cur->livei))
697 f.occupy(val);
698 }
699
700 if (cur->reg.data.id < 0) {
701 bool spill = !f.assign(&cur, 1);
702 if (spill) {
703 ERROR("out of registers of file %u\n", cur->reg.file);
704 abort();
705 }
706 }
707 free.occupy(cur);
708 active.insert(cur);
709 }
710
711 if (f.getMaxAssigned(FILE_GPR) > prog->maxGPR)
712 prog->maxGPR = f.getMaxAssigned(FILE_GPR);
713 if (free.getMaxAssigned(FILE_GPR) > prog->maxGPR)
714 prog->maxGPR = free.getMaxAssigned(FILE_GPR);
715 return true;
716 }
717
718 bool
719 RegAlloc::exec()
720 {
721 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
722 !fi.end(); fi.next()) {
723 func = reinterpret_cast<Function *>(fi.get());
724 if (!execFunc())
725 return false;
726 }
727 return true;
728 }
729
730 bool
731 RegAlloc::execFunc()
732 {
733 InsertConstraintsPass insertConstr;
734 PhiMovesPass insertMoves;
735 BuildIntervalsPass buildIntervals;
736
737 unsigned int i;
738 bool ret;
739
740 ret = insertConstr.exec(func);
741 if (!ret)
742 goto out;
743
744 ret = insertMoves.run(func);
745 if (!ret)
746 goto out;
747
748 for (sequence = func->cfg.nextSequence(), i = 0;
749 ret && i <= func->loopNestingBound;
750 sequence = func->cfg.nextSequence(), ++i)
751 ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
752 if (!ret)
753 goto out;
754
755 func->orderInstructions(this->insns);
756
757 ret = buildIntervals.run(func);
758 if (!ret)
759 goto out;
760
761 ret = coalesceValues(JOIN_MASK_PHI);
762 if (!ret)
763 goto out;
764 switch (prog->getTarget()->getChipset() & 0xf0) {
765 case 0x50:
766 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_TEX);
767 break;
768 case 0xc0:
769 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_CONSTRAINT);
770 break;
771 default:
772 break;
773 }
774 if (!ret)
775 goto out;
776 ret = coalesceValues(JOIN_MASK_MOV);
777 if (!ret)
778 goto out;
779
780 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
781 func->print();
782 func->printLiveIntervals();
783 }
784
785 ret = allocateConstrainedValues() && linearScan();
786 if (!ret)
787 goto out;
788
789 out:
790 // TODO: should probably call destructor on LValues later instead
791 for (ArrayList::Iterator it = func->allLValues.iterator();
792 !it.end(); it.next())
793 reinterpret_cast<LValue *>(it.get())->livei.clear();
794
795 return ret;
796 }
797
798 bool Program::registerAllocation()
799 {
800 RegAlloc ra(this);
801 return ra.exec();
802 }
803
804 bool
805 RegAlloc::InsertConstraintsPass::exec(Function *ir)
806 {
807 constrList.clear();
808
809 bool ret = run(ir, true, true);
810 if (ret)
811 ret = insertConstraintMoves();
812 return ret;
813 }
814
815 // TODO: make part of texture insn
816 void
817 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
818 {
819 Value *def[4];
820 int c, k, d;
821 uint8_t mask = 0;
822
823 for (d = 0, k = 0, c = 0; c < 4; ++c) {
824 if (!(tex->tex.mask & (1 << c)))
825 continue;
826 if (tex->getDef(k)->refCount()) {
827 mask |= 1 << c;
828 def[d++] = tex->getDef(k);
829 }
830 ++k;
831 }
832 tex->tex.mask = mask;
833
834 #if 0 // reorder or set the unused ones NULL ?
835 for (c = 0; c < 4; ++c)
836 if (!(tex->tex.mask & (1 << c)))
837 def[d++] = tex->getDef(c);
838 #endif
839 for (c = 0; c < d; ++c)
840 tex->setDef(c, def[c]);
841 #if 1
842 for (; c < 4; ++c)
843 tex->setDef(c, NULL);
844 #endif
845 }
846
847 bool
848 RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
849 {
850 // current register allocation can't handle it if a value participates in
851 // multiple constraints
852 for (ValueRef::Iterator it = cst->src[s].iterator(); !it.end(); it.next()) {
853 Instruction *insn = it.get()->getInsn();
854 if (insn != cst)
855 return true;
856 }
857
858 // can start at s + 1 because detectConflict is called on all sources
859 for (int c = s + 1; cst->srcExists(c); ++c)
860 if (cst->getSrc(c) == cst->getSrc(s))
861 return true;
862
863 Instruction *defi = cst->getSrc(s)->getInsn();
864
865 return (!defi || defi->constrainedDefs());
866 }
867
868 void
869 RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
870 {
871 Instruction *cst;
872 int d;
873
874 // first, look for an existing identical constraint op
875 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
876 cst = reinterpret_cast<Instruction *>(it.get());
877 if (!i->bb->dominatedBy(cst->bb))
878 break;
879 for (d = 0; d < n; ++d)
880 if (cst->getSrc(d) != i->getSrc(d + s))
881 break;
882 if (d >= n) {
883 for (d = 0; d < n; ++d, ++s)
884 i->setSrc(s, cst->getDef(d));
885 return;
886 }
887 }
888 cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
889
890 for (d = 0; d < n; ++s, ++d) {
891 cst->setDef(d, new_LValue(func, FILE_GPR));
892 cst->setSrc(d, i->getSrc(s));
893 i->setSrc(s, cst->getDef(d));
894 }
895 i->bb->insertBefore(i, cst);
896
897 constrList.insert(cst);
898 }
899
900 // Add a dummy use of the pointer source of >= 8 byte loads after the load
901 // to prevent it from being assigned a register which overlapping the load's
902 // destination, which would produce random corruptions.
903 void
904 RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
905 {
906 Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
907 hzd->setSrc(0, src->get());
908 i->bb->insertAfter(i, hzd);
909
910 }
911
912 // Insert constraint markers for instructions whose multiple sources must be
913 // located in consecutive registers.
914 bool
915 RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
916 {
917 TexInstruction *tex;
918 Instruction *next;
919 int s, n, size;
920
921 for (Instruction *i = bb->getEntry(); i; i = next) {
922 next = i->next;
923
924 if ((tex = i->asTex())) {
925 textureMask(tex);
926
927 // FIXME: this is target specific
928 if (tex->op == OP_TXQ) {
929 s = tex->srcCount(0xff);
930 n = 0;
931 } else {
932 s = tex->tex.target.getArgCount();
933 if (!tex->tex.target.isArray() &&
934 (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
935 ++s;
936 n = tex->srcCount(0xff) - s;
937 assert(n <= 4);
938 }
939
940 if (s > 1)
941 addConstraint(i, 0, s);
942 if (n > 1)
943 addConstraint(i, s, n);
944 } else
945 if (i->op == OP_EXPORT || i->op == OP_STORE) {
946 for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
947 assert(i->srcExists(s));
948 size -= i->getSrc(s)->reg.size;
949 }
950 if ((s - 1) > 1)
951 addConstraint(i, 1, s - 1);
952 } else
953 if (i->op == OP_LOAD) {
954 if (i->src[0].isIndirect(0) && typeSizeof(i->dType) >= 8)
955 addHazard(i, i->src[0].getIndirect(0));
956 }
957 }
958 return true;
959 }
960
961 // Insert extra moves so that, if multiple register constraints on a value are
962 // in conflict, these conflicts can be resolved.
963 bool
964 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
965 {
966 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
967 Instruction *cst = reinterpret_cast<Instruction *>(it.get());
968
969 for (int s = 0; cst->srcExists(s); ++s) {
970 if (!detectConflict(cst, s))
971 continue;
972 Instruction *mov = new_Instruction(func, OP_MOV,
973 typeOfSize(cst->src[s].getSize()));
974 mov->setSrc(0, cst->getSrc(s));
975 mov->setDef(0, new_LValue(func, FILE_GPR));
976 cst->setSrc(s, mov->getDef(0));
977
978 cst->bb->insertBefore(cst, mov);
979 }
980 }
981 return true;
982 }
983
984 } // namespace nv50_ir