nv50/ir: Take into account function args in the live range calculation code.
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir_ra.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir.h"
24 #include "nv50_ir_target.h"
25
26 #include "nv50/nv50_debug.h"
27
28 namespace nv50_ir {
29
30 #define MAX_REGISTER_FILE_SIZE 256
31
32 class RegisterSet
33 {
34 public:
35 RegisterSet();
36 RegisterSet(const Target *);
37
38 void init(const Target *);
39 void reset(); // reset allocation status, but not max assigned regs
40
41 void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
42 void intersect(DataFile f, const RegisterSet *);
43
44 bool assign(Value **, int nr);
45 void release(const Value *);
46 bool occupy(const Value *);
47
48 int getMaxAssigned(DataFile f) const { return fill[f]; }
49
50 void print() const;
51
52 private:
53 uint32_t bits[FILE_ADDRESS + 1][(MAX_REGISTER_FILE_SIZE + 31) / 32];
54
55 int unit[FILE_ADDRESS + 1]; // log2 of allocation granularity
56
57 int last[FILE_ADDRESS + 1];
58 int fill[FILE_ADDRESS + 1];
59 };
60
61 void
62 RegisterSet::reset()
63 {
64 memset(bits, 0, sizeof(bits));
65 }
66
67 RegisterSet::RegisterSet()
68 {
69 reset();
70 }
71
72 void
73 RegisterSet::init(const Target *targ)
74 {
75 for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
76 DataFile f = static_cast<DataFile>(rf);
77 last[rf] = targ->getFileSize(f) - 1;
78 unit[rf] = targ->getFileUnit(f);
79 fill[rf] = -1;
80 assert(last[rf] < MAX_REGISTER_FILE_SIZE);
81 }
82 }
83
84 RegisterSet::RegisterSet(const Target *targ)
85 {
86 reset();
87 init(targ);
88 }
89
90 void
91 RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
92 {
93 for (int i = 0; i < (last[f] + 31) / 32; ++i)
94 bits[f][i] = (bits[f][i] | lock) & ~unlock;
95 }
96
97 void
98 RegisterSet::intersect(DataFile f, const RegisterSet *set)
99 {
100 for (int i = 0; i < (last[f] + 31) / 32; ++i)
101 bits[f][i] |= set->bits[f][i];
102 }
103
104 void
105 RegisterSet::print() const
106 {
107 INFO("GPR:");
108 for (int i = 0; i < (last[FILE_GPR] + 31) / 32; ++i)
109 INFO(" %08x", bits[FILE_GPR][i]);
110 INFO("\n");
111 }
112
113 bool
114 RegisterSet::assign(Value **def, int nr)
115 {
116 DataFile f = def[0]->reg.file;
117 int n = nr;
118 if (n == 3)
119 n = 4;
120 int s = (n * def[0]->reg.size) >> unit[f];
121 uint32_t m = (1 << s) - 1;
122
123 int id = last[f] + 1;
124 int i;
125
126 for (i = 0; (i * 32) < last[f]; ++i) {
127 if (bits[f][i] == 0xffffffff)
128 continue;
129
130 for (id = 0; id < 32; id += s)
131 if (!(bits[f][i] & (m << id)))
132 break;
133 if (id < 32)
134 break;
135 }
136 id += i * 32;
137 if (id > last[f])
138 return false;
139
140 bits[f][id / 32] |= m << (id % 32);
141
142 if (id + (s - 1) > fill[f])
143 fill[f] = id + (s - 1);
144
145 for (i = 0; i < nr; ++i, ++id)
146 if (!def[i]->livei.isEmpty()) // XXX: really increased id if empty ?
147 def[i]->reg.data.id = id;
148 return true;
149 }
150
151 bool
152 RegisterSet::occupy(const Value *val)
153 {
154 int id = val->reg.data.id;
155 unsigned int f = val->reg.file;
156
157 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
158
159 if (id < 0 || bits[f][id / 32] & m << (id % 32))
160 return false;
161
162 INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %x\n", f, id, m);
163
164 bits[f][id / 32] |= m << (id % 32);
165
166 if (fill[f] < id)
167 fill[f] = id;
168
169 return true;
170 }
171
172 void
173 RegisterSet::release(const Value *val)
174 {
175 int id = val->reg.data.id;
176 if (id < 0)
177 return;
178 unsigned int f = val->reg.file;
179
180 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
181
182 INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %x\n", f, id, m);
183
184 bits[f][id / 32] &= ~(m << (id % 32));
185 }
186
187 #define JOIN_MASK_PHI (1 << 0)
188 #define JOIN_MASK_UNION (1 << 1)
189 #define JOIN_MASK_MOV (1 << 2)
190 #define JOIN_MASK_TEX (1 << 3)
191 #define JOIN_MASK_CONSTRAINT (1 << 4)
192
193 class RegAlloc
194 {
195 public:
196 RegAlloc(Program *program) : prog(program), sequence(0) { }
197
198 bool exec();
199 bool execFunc();
200
201 private:
202 bool coalesceValues(unsigned int mask);
203 bool linearScan();
204 bool allocateConstrainedValues();
205
206 private:
207 class PhiMovesPass : public Pass {
208 private:
209 virtual bool visit(BasicBlock *);
210 inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
211 };
212
213 class ArgumentMovesPass : public Pass {
214 private:
215 virtual bool visit(BasicBlock *);
216 };
217
218 class BuildIntervalsPass : public Pass {
219 private:
220 virtual bool visit(BasicBlock *);
221 void collectLiveValues(BasicBlock *);
222 void addLiveRange(Value *, const BasicBlock *, int end);
223 };
224
225 class InsertConstraintsPass : public Pass {
226 public:
227 bool exec(Function *func);
228 private:
229 virtual bool visit(BasicBlock *);
230
231 bool insertConstraintMoves();
232
233 void addHazard(Instruction *i, const ValueRef *src);
234 void textureMask(TexInstruction *);
235 void addConstraint(Instruction *, int s, int n);
236 bool detectConflict(Instruction *, int s);
237
238 DLList constrList;
239 };
240
241 bool buildLiveSets(BasicBlock *);
242 void collectLValues(DLList&, bool assignedOnly);
243
244 void insertOrderedTail(DLList&, Value *);
245 inline Instruction *insnBySerial(int);
246
247 private:
248 Program *prog;
249 Function *func;
250
251 // instructions in control flow / chronological order
252 ArrayList insns;
253
254 int sequence; // for manual passes through CFG
255 };
256
257 Instruction *
258 RegAlloc::insnBySerial(int serial)
259 {
260 return reinterpret_cast<Instruction *>(insns.get(serial));
261 }
262
263 void
264 RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
265 const BasicBlock *bb,
266 int end)
267 {
268 Instruction *insn = val->getUniqueInsn();
269
270 if (!insn)
271 insn = bb->getFirst();
272
273 assert(bb->getFirst()->serial <= bb->getExit()->serial);
274 assert(bb->getExit()->serial + 1 >= end);
275
276 int begin = insn->serial;
277 if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
278 begin = bb->getEntry()->serial;
279
280 INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
281 val->id, begin, insn->serial, end);
282
283 if (begin != end) // empty ranges are only added as hazards for fixed regs
284 val->livei.extend(begin, end);
285 }
286
287 bool
288 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
289 {
290 if (b->cfg.incidentCount() <= 1)
291 return false;
292
293 int n = 0;
294 for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
295 if (ei.getType() == Graph::Edge::TREE ||
296 ei.getType() == Graph::Edge::FORWARD)
297 ++n;
298 return (n == 2);
299 }
300
301 // For each operand of each PHI in b, generate a new value by inserting a MOV
302 // at the end of the block it is coming from and replace the operand with its
303 // result. This eliminates liveness conflicts and enables us to let values be
304 // copied to the right register if such a conflict exists nonetheless.
305 //
306 // These MOVs are also crucial in making sure the live intervals of phi srces
307 // are extended until the end of the loop, since they are not included in the
308 // live-in sets.
309 bool
310 RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
311 {
312 Instruction *phi, *mov;
313 BasicBlock *pb, *pn;
314
315 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
316 pb = pn = BasicBlock::get(ei.getNode());
317 assert(pb);
318
319 if (needNewElseBlock(bb, pb)) {
320 pn = new BasicBlock(func);
321
322 // deletes an edge, iterator is invalid after this:
323 pb->cfg.detach(&bb->cfg);
324 pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
325 pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); // XXX: check order !
326
327 assert(pb->getExit()->op != OP_CALL);
328 if (pb->getExit()->asFlow()->target.bb == bb)
329 pb->getExit()->asFlow()->target.bb = pn;
330 break;
331 }
332 }
333
334 // insert MOVs (phi->src(j) should stem from j-th in-BB)
335 int j = 0;
336 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
337 pb = BasicBlock::get(ei.getNode());
338 if (!pb->isTerminated())
339 pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
340
341 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
342 mov = new_Instruction(func, OP_MOV, TYPE_U32);
343
344 mov->setSrc(0, phi->getSrc(j));
345 mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
346 phi->setSrc(j, mov->getDef(0));
347
348 pb->insertBefore(pb->getExit(), mov);
349 }
350 ++j;
351 }
352
353 return true;
354 }
355
356 bool
357 RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb)
358 {
359 // Bind function call inputs/outputs to the same physical register
360 // the callee uses, inserting moves as appropriate for the case a
361 // conflict arises.
362 for (Instruction *i = bb->getEntry(); i; i = i->next) {
363 FlowInstruction *cal = i->asFlow();
364 if (!cal || cal->op != OP_CALL || cal->builtin)
365 continue;
366 RegisterSet clobberSet(prog->getTarget());
367
368 // Bind input values.
369 for (int s = 0; cal->srcExists(s); ++s) {
370 LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue());
371 tmp->reg.data.id = cal->target.fn->ins[s].rep()->reg.data.id;
372
373 Instruction *mov =
374 new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
375 mov->setDef(0, tmp);
376 mov->setSrc(0, cal->getSrc(s));
377 cal->setSrc(s, tmp);
378
379 bb->insertBefore(cal, mov);
380 }
381
382 // Bind output values.
383 for (int d = 0; cal->defExists(d); ++d) {
384 LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue());
385 tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id;
386
387 Instruction *mov =
388 new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
389 mov->setSrc(0, tmp);
390 mov->setDef(0, cal->getDef(d));
391 cal->setDef(d, tmp);
392
393 bb->insertAfter(cal, mov);
394 clobberSet.occupy(tmp);
395 }
396
397 // Bind clobbered values.
398 for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin();
399 it != cal->target.fn->clobbers.end();
400 ++it) {
401 if (clobberSet.occupy(*it)) {
402 Value *tmp = new_LValue(func, (*it)->asLValue());
403 tmp->reg.data.id = (*it)->reg.data.id;
404 cal->setDef(cal->defCount(), tmp);
405 }
406 }
407 }
408
409 // Update the clobber set of the function.
410 if (BasicBlock::get(func->cfgExit) == bb) {
411 func->buildDefSets();
412 for (unsigned int i = 0; i < bb->defSet.getSize(); ++i)
413 if (bb->defSet.test(i))
414 func->clobbers.push_back(func->getLValue(i));
415 }
416
417 return true;
418 }
419
420 // Build the set of live-in variables of bb.
421 bool
422 RegAlloc::buildLiveSets(BasicBlock *bb)
423 {
424 Function *f = bb->getFunction();
425 BasicBlock *bn;
426 Instruction *i;
427 unsigned int s, d;
428
429 INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
430
431 bb->liveSet.allocate(func->allLValues.getSize(), false);
432
433 int n = 0;
434 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
435 bn = BasicBlock::get(ei.getNode());
436 if (bn == bb)
437 continue;
438 if (bn->cfg.visit(sequence))
439 if (!buildLiveSets(bn))
440 return false;
441 if (n++ == 0)
442 bb->liveSet = bn->liveSet;
443 else
444 bb->liveSet |= bn->liveSet;
445 }
446 if (!n && !bb->liveSet.marker)
447 bb->liveSet.fill(0);
448 bb->liveSet.marker = true;
449
450 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
451 INFO("BB:%i live set of out blocks:\n", bb->getId());
452 bb->liveSet.print();
453 }
454
455 // if (!bb->getEntry())
456 // return true;
457
458 if (bb == BasicBlock::get(f->cfgExit)) {
459 for (std::deque<ValueRef>::iterator it = f->outs.begin();
460 it != f->outs.end(); ++it) {
461 assert(it->get()->asLValue());
462 bb->liveSet.set(it->get()->id);
463 }
464 }
465
466 for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
467 for (d = 0; i->defExists(d); ++d)
468 bb->liveSet.clr(i->getDef(d)->id);
469 for (s = 0; i->srcExists(s); ++s)
470 if (i->getSrc(s)->asLValue())
471 bb->liveSet.set(i->getSrc(s)->id);
472 }
473 for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
474 bb->liveSet.clr(i->getDef(0)->id);
475
476 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
477 INFO("BB:%i live set after propagation:\n", bb->getId());
478 bb->liveSet.print();
479 }
480
481 return true;
482 }
483
484 void
485 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
486 {
487 BasicBlock *bbA = NULL, *bbB = NULL;
488
489 if (bb->cfg.outgoingCount()) {
490 // trickery to save a loop of OR'ing liveSets
491 // aliasing works fine with BitSet::setOr
492 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
493 if (ei.getType() == Graph::Edge::DUMMY)
494 continue;
495 if (bbA) {
496 bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
497 bbA = bb;
498 } else {
499 bbA = bbB;
500 }
501 bbB = BasicBlock::get(ei.getNode());
502 }
503 bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
504 } else
505 if (bb->cfg.incidentCount()) {
506 bb->liveSet.fill(0);
507 }
508 }
509
510 bool
511 RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
512 {
513 collectLiveValues(bb);
514
515 INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
516
517 // go through out blocks and delete phi sources that do not originate from
518 // the current block from the live set
519 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
520 BasicBlock *out = BasicBlock::get(ei.getNode());
521
522 for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
523 bb->liveSet.clr(i->getDef(0)->id);
524
525 for (int s = 0; i->srcExists(s); ++s) {
526 assert(i->src(s).getInsn());
527 if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
528 bb->liveSet.set(i->getSrc(s)->id);
529 else
530 bb->liveSet.clr(i->getSrc(s)->id);
531 }
532 }
533 }
534
535 // remaining live-outs are live until end
536 if (bb->getExit()) {
537 for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
538 if (bb->liveSet.test(j))
539 addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
540 }
541
542 for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
543 for (int d = 0; i->defExists(d); ++d) {
544 bb->liveSet.clr(i->getDef(d)->id);
545 if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
546 i->getDef(d)->livei.extend(i->serial, i->serial);
547 }
548
549 for (int s = 0; i->srcExists(s); ++s) {
550 if (!i->getSrc(s)->asLValue())
551 continue;
552 if (!bb->liveSet.test(i->getSrc(s)->id)) {
553 bb->liveSet.set(i->getSrc(s)->id);
554 addLiveRange(i->getSrc(s), bb, i->serial);
555 }
556 }
557 }
558
559 if (bb == BasicBlock::get(func->cfg.getRoot())) {
560 for (std::deque<ValueDef>::iterator it = func->ins.begin();
561 it != func->ins.end(); ++it) {
562 if (it->get()->reg.data.id >= 0) // add hazard for fixed regs
563 it->get()->livei.extend(0, 1);
564 }
565 }
566
567 return true;
568 }
569
570 bool
571 RegAlloc::coalesceValues(unsigned int mask)
572 {
573 int c, n;
574
575 for (n = 0; n < insns.getSize(); ++n) {
576 Instruction *i;
577 Instruction *insn = insnBySerial(n);
578
579 switch (insn->op) {
580 case OP_PHI:
581 if (!(mask & JOIN_MASK_PHI))
582 break;
583 for (c = 0; insn->srcExists(c); ++c)
584 if (!insn->getDef(0)->coalesce(insn->getSrc(c), false)) {
585 ERROR("failed to coalesce phi operands\n");
586 return false;
587 }
588 break;
589 case OP_UNION:
590 if (!(mask & JOIN_MASK_UNION))
591 break;
592 for (c = 0; insn->srcExists(c); ++c)
593 insn->getDef(0)->coalesce(insn->getSrc(c), true);
594 break;
595 case OP_CONSTRAINT:
596 if (!(mask & JOIN_MASK_CONSTRAINT))
597 break;
598 for (c = 0; c < 4 && insn->srcExists(c); ++c)
599 insn->getDef(c)->coalesce(insn->getSrc(c), true);
600 break;
601 case OP_MOV:
602 if (!(mask & JOIN_MASK_MOV))
603 break;
604 i = NULL;
605 if (!insn->getDef(0)->uses.empty())
606 i = insn->getDef(0)->uses.front()->getInsn();
607 // if this is a contraint-move there will only be a single use
608 if (i && i->op == OP_CONSTRAINT)
609 break;
610 i = insn->getSrc(0)->getUniqueInsn();
611 if (i && !i->constrainedDefs())
612 insn->getDef(0)->coalesce(insn->getSrc(0), false);
613 break;
614 case OP_TEX:
615 case OP_TXB:
616 case OP_TXL:
617 case OP_TXF:
618 case OP_TXQ:
619 case OP_TXD:
620 case OP_TXG:
621 case OP_TEXCSAA:
622 if (!(mask & JOIN_MASK_TEX))
623 break;
624 for (c = 0; c < 4 && insn->srcExists(c); ++c)
625 insn->getDef(c)->coalesce(insn->getSrc(c), true);
626 break;
627 default:
628 break;
629 }
630 }
631 return true;
632 }
633
634 void
635 RegAlloc::insertOrderedTail(DLList &list, Value *val)
636 {
637 // we insert the live intervals in order, so this should be short
638 DLList::Iterator iter = list.revIterator();
639 const int begin = val->livei.begin();
640 for (; !iter.end(); iter.next()) {
641 if (reinterpret_cast<Value *>(iter.get())->livei.begin() <= begin)
642 break;
643 }
644 iter.insert(val);
645 }
646
647 static void
648 checkList(DLList &list)
649 {
650 Value *prev = NULL;
651 Value *next = NULL;
652
653 for (DLList::Iterator iter = list.iterator(); !iter.end(); iter.next()) {
654 next = Value::get(iter);
655 assert(next);
656 if (prev) {
657 assert(prev->livei.begin() <= next->livei.begin());
658 }
659 assert(next->join == next);
660 prev = next;
661 }
662 }
663
664 void
665 RegAlloc::collectLValues(DLList &list, bool assignedOnly)
666 {
667 for (int n = 0; n < insns.getSize(); ++n) {
668 Instruction *i = insnBySerial(n);
669
670 for (int d = 0; i->defExists(d); ++d)
671 if (!i->getDef(d)->livei.isEmpty())
672 if (!assignedOnly || i->getDef(d)->reg.data.id >= 0)
673 insertOrderedTail(list, i->getDef(d));
674 }
675 checkList(list);
676 }
677
678 bool
679 RegAlloc::allocateConstrainedValues()
680 {
681 Value *defs[4];
682 RegisterSet regSet[4];
683 DLList regVals;
684
685 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: allocating constrained values\n");
686
687 collectLValues(regVals, true);
688
689 for (int c = 0; c < 4; ++c)
690 regSet[c].init(prog->getTarget());
691
692 for (int n = 0; n < insns.getSize(); ++n) {
693 Instruction *i = insnBySerial(n);
694
695 const int vecSize = i->defCount(0xf);
696 if (vecSize < 2)
697 continue;
698 assert(vecSize <= 4);
699
700 for (int c = 0; c < vecSize; ++c)
701 defs[c] = i->def(c).rep();
702
703 if (defs[0]->reg.data.id >= 0) {
704 for (int c = 1; c < vecSize; ++c) {
705 assert(defs[c]->reg.data.id >= 0);
706 }
707 continue;
708 }
709
710 for (int c = 0; c < vecSize; ++c) {
711 uint32_t mask;
712 regSet[c].reset();
713
714 for (DLList::Iterator it = regVals.iterator(); !it.end(); it.next()) {
715 Value *rVal = Value::get(it);
716 if (rVal->reg.data.id >= 0 && rVal->livei.overlaps(defs[c]->livei))
717 regSet[c].occupy(rVal);
718 }
719 mask = 0x11111111;
720 if (vecSize == 2) // granularity is 2 instead of 4
721 mask |= 0x11111111 << 2;
722 regSet[c].periodicMask(defs[0]->reg.file, 0, ~(mask << c));
723
724 if (!defs[c]->livei.isEmpty())
725 insertOrderedTail(regVals, defs[c]);
726 }
727 for (int c = 1; c < vecSize; ++c)
728 regSet[0].intersect(defs[0]->reg.file, &regSet[c]);
729
730 if (!regSet[0].assign(&defs[0], vecSize)) // TODO: spilling
731 return false;
732 }
733 for (int c = 0; c < 4; c += 2)
734 if (regSet[c].getMaxAssigned(FILE_GPR) > prog->maxGPR)
735 prog->maxGPR = regSet[c].getMaxAssigned(FILE_GPR);
736 return true;
737 }
738
739 bool
740 RegAlloc::linearScan()
741 {
742 Value *cur, *val;
743 DLList unhandled, active, inactive;
744 RegisterSet f(prog->getTarget()), free(prog->getTarget());
745
746 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: linear scan\n");
747
748 collectLValues(unhandled, false);
749
750 for (DLList::Iterator cI = unhandled.iterator(); !cI.end();) {
751 cur = Value::get(cI);
752 cI.erase();
753
754 for (DLList::Iterator aI = active.iterator(); !aI.end();) {
755 val = Value::get(aI);
756 if (val->livei.end() <= cur->livei.begin()) {
757 free.release(val);
758 aI.erase();
759 } else
760 if (!val->livei.contains(cur->livei.begin())) {
761 free.release(val);
762 aI.moveToList(inactive);
763 } else {
764 aI.next();
765 }
766 }
767
768 for (DLList::Iterator iI = inactive.iterator(); !iI.end();) {
769 val = Value::get(iI);
770 if (val->livei.end() <= cur->livei.begin()) {
771 iI.erase();
772 } else
773 if (val->livei.contains(cur->livei.begin())) {
774 free.occupy(val);
775 iI.moveToList(active);
776 } else {
777 iI.next();
778 }
779 }
780 f = free;
781
782 for (DLList::Iterator iI = inactive.iterator(); !iI.end(); iI.next()) {
783 val = Value::get(iI);
784 if (val->livei.overlaps(cur->livei))
785 f.occupy(val);
786 }
787
788 for (DLList::Iterator uI = unhandled.iterator(); !uI.end(); uI.next()) {
789 val = Value::get(uI);
790 if (val->reg.data.id >= 0 && val->livei.overlaps(cur->livei))
791 f.occupy(val);
792 }
793
794 if (cur->reg.data.id < 0) {
795 bool spill = !f.assign(&cur, 1);
796 if (spill) {
797 ERROR("out of registers of file %u\n", cur->reg.file);
798 abort();
799 }
800 }
801 free.occupy(cur);
802 active.insert(cur);
803 }
804
805 if (f.getMaxAssigned(FILE_GPR) > prog->maxGPR)
806 prog->maxGPR = f.getMaxAssigned(FILE_GPR);
807 if (free.getMaxAssigned(FILE_GPR) > prog->maxGPR)
808 prog->maxGPR = free.getMaxAssigned(FILE_GPR);
809 return true;
810 }
811
812 bool
813 RegAlloc::exec()
814 {
815 for (IteratorRef it = prog->calls.iteratorDFS(false);
816 !it->end(); it->next()) {
817 func = Function::get(reinterpret_cast<Graph::Node *>(it->get()));
818 if (!execFunc())
819 return false;
820 }
821 return true;
822 }
823
824 bool
825 RegAlloc::execFunc()
826 {
827 InsertConstraintsPass insertConstr;
828 PhiMovesPass insertPhiMoves;
829 ArgumentMovesPass insertArgMoves;
830 BuildIntervalsPass buildIntervals;
831
832 unsigned int i;
833 bool ret;
834
835 ret = insertConstr.exec(func);
836 if (!ret)
837 goto out;
838
839 ret = insertPhiMoves.run(func);
840 if (!ret)
841 goto out;
842
843 ret = insertArgMoves.run(func, true);
844 if (!ret)
845 goto out;
846
847 for (sequence = func->cfg.nextSequence(), i = 0;
848 ret && i <= func->loopNestingBound;
849 sequence = func->cfg.nextSequence(), ++i)
850 ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
851 if (!ret)
852 goto out;
853
854 func->orderInstructions(this->insns);
855
856 ret = buildIntervals.run(func);
857 if (!ret)
858 goto out;
859
860 ret = coalesceValues(JOIN_MASK_PHI);
861 if (!ret)
862 goto out;
863 switch (prog->getTarget()->getChipset() & 0xf0) {
864 case 0x50:
865 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_TEX);
866 break;
867 case 0xc0:
868 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_CONSTRAINT);
869 break;
870 default:
871 break;
872 }
873 if (!ret)
874 goto out;
875 ret = coalesceValues(JOIN_MASK_MOV);
876 if (!ret)
877 goto out;
878
879 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
880 func->print();
881 func->printLiveIntervals();
882 }
883
884 ret = allocateConstrainedValues() && linearScan();
885 if (!ret)
886 goto out;
887
888 out:
889 // TODO: should probably call destructor on LValues later instead
890 for (ArrayList::Iterator it = func->allLValues.iterator();
891 !it.end(); it.next())
892 reinterpret_cast<LValue *>(it.get())->livei.clear();
893
894 return ret;
895 }
896
897 bool Program::registerAllocation()
898 {
899 RegAlloc ra(this);
900 return ra.exec();
901 }
902
903 bool
904 RegAlloc::InsertConstraintsPass::exec(Function *ir)
905 {
906 constrList.clear();
907
908 bool ret = run(ir, true, true);
909 if (ret)
910 ret = insertConstraintMoves();
911 return ret;
912 }
913
914 // TODO: make part of texture insn
915 void
916 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
917 {
918 Value *def[4];
919 int c, k, d;
920 uint8_t mask = 0;
921
922 for (d = 0, k = 0, c = 0; c < 4; ++c) {
923 if (!(tex->tex.mask & (1 << c)))
924 continue;
925 if (tex->getDef(k)->refCount()) {
926 mask |= 1 << c;
927 def[d++] = tex->getDef(k);
928 }
929 ++k;
930 }
931 tex->tex.mask = mask;
932
933 #if 0 // reorder or set the unused ones NULL ?
934 for (c = 0; c < 4; ++c)
935 if (!(tex->tex.mask & (1 << c)))
936 def[d++] = tex->getDef(c);
937 #endif
938 for (c = 0; c < d; ++c)
939 tex->setDef(c, def[c]);
940 #if 1
941 for (; c < 4; ++c)
942 tex->setDef(c, NULL);
943 #endif
944 }
945
946 bool
947 RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
948 {
949 Value *v = cst->getSrc(s);
950
951 // current register allocation can't handle it if a value participates in
952 // multiple constraints
953 for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) {
954 if (cst != (*it)->getInsn())
955 return true;
956 }
957
958 // can start at s + 1 because detectConflict is called on all sources
959 for (int c = s + 1; cst->srcExists(c); ++c)
960 if (v == cst->getSrc(c))
961 return true;
962
963 Instruction *defi = v->getInsn();
964
965 return (!defi || defi->constrainedDefs());
966 }
967
968 void
969 RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
970 {
971 Instruction *cst;
972 int d;
973
974 // first, look for an existing identical constraint op
975 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
976 cst = reinterpret_cast<Instruction *>(it.get());
977 if (!i->bb->dominatedBy(cst->bb))
978 break;
979 for (d = 0; d < n; ++d)
980 if (cst->getSrc(d) != i->getSrc(d + s))
981 break;
982 if (d >= n) {
983 for (d = 0; d < n; ++d, ++s)
984 i->setSrc(s, cst->getDef(d));
985 return;
986 }
987 }
988 cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
989
990 for (d = 0; d < n; ++s, ++d) {
991 cst->setDef(d, new_LValue(func, FILE_GPR));
992 cst->setSrc(d, i->getSrc(s));
993 i->setSrc(s, cst->getDef(d));
994 }
995 i->bb->insertBefore(i, cst);
996
997 constrList.insert(cst);
998 }
999
1000 // Add a dummy use of the pointer source of >= 8 byte loads after the load
1001 // to prevent it from being assigned a register which overlapping the load's
1002 // destination, which would produce random corruptions.
1003 void
1004 RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
1005 {
1006 Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
1007 hzd->setSrc(0, src->get());
1008 i->bb->insertAfter(i, hzd);
1009
1010 }
1011
1012 // Insert constraint markers for instructions whose multiple sources must be
1013 // located in consecutive registers.
1014 bool
1015 RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
1016 {
1017 TexInstruction *tex;
1018 Instruction *next;
1019 int s, n, size;
1020
1021 for (Instruction *i = bb->getEntry(); i; i = next) {
1022 next = i->next;
1023
1024 if ((tex = i->asTex())) {
1025 textureMask(tex);
1026
1027 // FIXME: this is target specific
1028 if (tex->op == OP_TXQ) {
1029 s = tex->srcCount(0xff);
1030 n = 0;
1031 } else {
1032 s = tex->tex.target.getArgCount();
1033 if (!tex->tex.target.isArray() &&
1034 (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
1035 ++s;
1036 if (tex->op == OP_TXD && tex->tex.useOffsets)
1037 ++s;
1038 n = tex->srcCount(0xff) - s;
1039 assert(n <= 4);
1040 }
1041
1042 if (s > 1)
1043 addConstraint(i, 0, s);
1044 if (n > 1)
1045 addConstraint(i, s, n);
1046 } else
1047 if (i->op == OP_EXPORT || i->op == OP_STORE) {
1048 for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
1049 assert(i->srcExists(s));
1050 size -= i->getSrc(s)->reg.size;
1051 }
1052 if ((s - 1) > 1)
1053 addConstraint(i, 1, s - 1);
1054 } else
1055 if (i->op == OP_LOAD) {
1056 if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
1057 addHazard(i, i->src(0).getIndirect(0));
1058 }
1059 }
1060 return true;
1061 }
1062
1063 // Insert extra moves so that, if multiple register constraints on a value are
1064 // in conflict, these conflicts can be resolved.
1065 bool
1066 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
1067 {
1068 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
1069 Instruction *cst = reinterpret_cast<Instruction *>(it.get());
1070
1071 for (int s = 0; cst->srcExists(s); ++s) {
1072 if (!detectConflict(cst, s))
1073 continue;
1074 Instruction *mov = new_Instruction(func, OP_MOV,
1075 typeOfSize(cst->src(s).getSize()));
1076 mov->setSrc(0, cst->getSrc(s));
1077 mov->setDef(0, new_LValue(func, FILE_GPR));
1078 cst->setSrc(s, mov->getDef(0));
1079
1080 cst->bb->insertBefore(cst, mov);
1081 }
1082 }
1083 return true;
1084 }
1085
1086 } // namespace nv50_ir