nv50/ir/ra: fix confusion with conditional RegisterSet::occupy
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir_ra.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir.h"
24 #include "nv50_ir_target.h"
25
26 #include <stack>
27 #include <limits>
28
29 namespace nv50_ir {
30
31 #define MAX_REGISTER_FILE_SIZE 256
32
33 class RegisterSet
34 {
35 public:
36 RegisterSet(const Target *);
37
38 void init(const Target *);
39 void reset(DataFile, bool resetMax = false);
40
41 void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
42 void intersect(DataFile f, const RegisterSet *);
43
44 bool assign(int32_t& reg, DataFile f, unsigned int size);
45 void release(DataFile f, int32_t reg, unsigned int size);
46 void occupy(DataFile f, int32_t reg, unsigned int size);
47 void occupy(const Value *);
48 void occupyMask(DataFile f, int32_t reg, uint8_t mask);
49 bool isOccupied(DataFile f, int32_t reg, unsigned int size) const;
50 bool testOccupy(const Value *);
51 bool testOccupy(DataFile f, int32_t reg, unsigned int size);
52
53 inline int getMaxAssigned(DataFile f) const { return fill[f]; }
54
55 inline unsigned int getFileSize(DataFile f, uint8_t regSize) const
56 {
57 if (restrictedGPR16Range && f == FILE_GPR && regSize == 2)
58 return (last[f] + 1) / 2;
59 return last[f] + 1;
60 }
61
62 inline unsigned int units(DataFile f, unsigned int size) const
63 {
64 return size >> unit[f];
65 }
66 // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
67 inline unsigned int idToBytes(const Value *v) const
68 {
69 return v->reg.data.id * MIN2(v->reg.size, 4);
70 }
71 inline unsigned int idToUnits(const Value *v) const
72 {
73 return units(v->reg.file, idToBytes(v));
74 }
75 inline int bytesToId(Value *v, unsigned int bytes) const
76 {
77 if (v->reg.size < 4)
78 return units(v->reg.file, bytes);
79 return bytes / 4;
80 }
81 inline int unitsToId(DataFile f, int u, uint8_t size) const
82 {
83 if (u < 0)
84 return -1;
85 return (size < 4) ? u : ((u << unit[f]) / 4);
86 }
87
88 void print() const;
89
90 private:
91 BitSet bits[LAST_REGISTER_FILE + 1];
92
93 int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity
94
95 int last[LAST_REGISTER_FILE + 1];
96 int fill[LAST_REGISTER_FILE + 1];
97
98 const bool restrictedGPR16Range;
99 };
100
101 void
102 RegisterSet::reset(DataFile f, bool resetMax)
103 {
104 bits[f].fill(0);
105 if (resetMax)
106 fill[f] = -1;
107 }
108
109 void
110 RegisterSet::init(const Target *targ)
111 {
112 for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
113 DataFile f = static_cast<DataFile>(rf);
114 last[rf] = targ->getFileSize(f) - 1;
115 unit[rf] = targ->getFileUnit(f);
116 fill[rf] = -1;
117 assert(last[rf] < MAX_REGISTER_FILE_SIZE);
118 bits[rf].allocate(last[rf] + 1, true);
119 }
120 }
121
122 RegisterSet::RegisterSet(const Target *targ)
123 : restrictedGPR16Range(targ->getChipset() < 0xc0)
124 {
125 init(targ);
126 for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i)
127 reset(static_cast<DataFile>(i));
128 }
129
130 void
131 RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
132 {
133 bits[f].periodicMask32(lock, unlock);
134 }
135
136 void
137 RegisterSet::intersect(DataFile f, const RegisterSet *set)
138 {
139 bits[f] |= set->bits[f];
140 }
141
142 void
143 RegisterSet::print() const
144 {
145 INFO("GPR:");
146 bits[FILE_GPR].print();
147 INFO("\n");
148 }
149
150 bool
151 RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size)
152 {
153 reg = bits[f].findFreeRange(size);
154 if (reg < 0)
155 return false;
156 fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
157 return true;
158 }
159
160 bool
161 RegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const
162 {
163 return bits[f].testRange(reg, size);
164 }
165
166 void
167 RegisterSet::occupy(const Value *v)
168 {
169 occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]);
170 }
171
172 void
173 RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask)
174 {
175 bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32));
176 }
177
178 void
179 RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size)
180 {
181 bits[f].setRange(reg, size);
182
183 INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size);
184
185 fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
186 }
187
188 bool
189 RegisterSet::testOccupy(const Value *v)
190 {
191 return testOccupy(v->reg.file,
192 idToUnits(v), v->reg.size >> unit[v->reg.file]);
193 }
194
195 bool
196 RegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size)
197 {
198 if (isOccupied(f, reg, size))
199 return false;
200 occupy(f, reg, size);
201 return true;
202 }
203
204 void
205 RegisterSet::release(DataFile f, int32_t reg, unsigned int size)
206 {
207 bits[f].clrRange(reg, size);
208
209 INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size);
210 }
211
212 class RegAlloc
213 {
214 public:
215 RegAlloc(Program *program) : prog(program), sequence(0) { }
216
217 bool exec();
218 bool execFunc();
219
220 private:
221 class PhiMovesPass : public Pass {
222 private:
223 virtual bool visit(BasicBlock *);
224 inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
225 };
226
227 class ArgumentMovesPass : public Pass {
228 private:
229 virtual bool visit(BasicBlock *);
230 };
231
232 class BuildIntervalsPass : public Pass {
233 private:
234 virtual bool visit(BasicBlock *);
235 void collectLiveValues(BasicBlock *);
236 void addLiveRange(Value *, const BasicBlock *, int end);
237 };
238
239 class InsertConstraintsPass : public Pass {
240 public:
241 bool exec(Function *func);
242 private:
243 virtual bool visit(BasicBlock *);
244
245 bool insertConstraintMoves();
246
247 void condenseDefs(Instruction *);
248 void condenseSrcs(Instruction *, const int first, const int last);
249
250 void addHazard(Instruction *i, const ValueRef *src);
251 void textureMask(TexInstruction *);
252 void addConstraint(Instruction *, int s, int n);
253 bool detectConflict(Instruction *, int s);
254
255 // target specific functions, TODO: put in subclass or Target
256 void texConstraintNV50(TexInstruction *);
257 void texConstraintNVC0(TexInstruction *);
258 void texConstraintNVE0(TexInstruction *);
259
260 std::list<Instruction *> constrList;
261
262 const Target *targ;
263 };
264
265 bool buildLiveSets(BasicBlock *);
266
267 private:
268 Program *prog;
269 Function *func;
270
271 // instructions in control flow / chronological order
272 ArrayList insns;
273
274 int sequence; // for manual passes through CFG
275 };
276
277 typedef std::pair<Value *, Value *> ValuePair;
278
279 class SpillCodeInserter
280 {
281 public:
282 SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { }
283
284 bool run(const std::list<ValuePair>&);
285
286 Symbol *assignSlot(const Interval&, const unsigned int size);
287 inline int32_t getStackSize() const { return stackSize; }
288
289 private:
290 Function *func;
291
292 struct SpillSlot
293 {
294 Interval occup;
295 std::list<Value *> residents; // needed to recalculate occup
296 Symbol *sym;
297 int32_t offset;
298 inline uint8_t size() const { return sym->reg.size; }
299 };
300 std::list<SpillSlot> slots;
301 int32_t stackSize;
302 int32_t stackBase;
303
304 LValue *unspill(Instruction *usei, LValue *, Value *slot);
305 void spill(Instruction *defi, Value *slot, LValue *);
306 };
307
308 void
309 RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
310 const BasicBlock *bb,
311 int end)
312 {
313 Instruction *insn = val->getUniqueInsn();
314
315 if (!insn)
316 insn = bb->getFirst();
317
318 assert(bb->getFirst()->serial <= bb->getExit()->serial);
319 assert(bb->getExit()->serial + 1 >= end);
320
321 int begin = insn->serial;
322 if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
323 begin = bb->getEntry()->serial;
324
325 INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
326 val->id, begin, insn->serial, end);
327
328 if (begin != end) // empty ranges are only added as hazards for fixed regs
329 val->livei.extend(begin, end);
330 }
331
332 bool
333 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
334 {
335 if (b->cfg.incidentCount() <= 1)
336 return false;
337
338 int n = 0;
339 for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
340 if (ei.getType() == Graph::Edge::TREE ||
341 ei.getType() == Graph::Edge::FORWARD)
342 ++n;
343 return (n == 2);
344 }
345
346 // For each operand of each PHI in b, generate a new value by inserting a MOV
347 // at the end of the block it is coming from and replace the operand with its
348 // result. This eliminates liveness conflicts and enables us to let values be
349 // copied to the right register if such a conflict exists nonetheless.
350 //
351 // These MOVs are also crucial in making sure the live intervals of phi srces
352 // are extended until the end of the loop, since they are not included in the
353 // live-in sets.
354 bool
355 RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
356 {
357 Instruction *phi, *mov;
358 BasicBlock *pb, *pn;
359
360 std::stack<BasicBlock *> stack;
361
362 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
363 pb = BasicBlock::get(ei.getNode());
364 assert(pb);
365 if (needNewElseBlock(bb, pb))
366 stack.push(pb);
367 }
368 while (!stack.empty()) {
369 pb = stack.top();
370 pn = new BasicBlock(func);
371 stack.pop();
372
373 pb->cfg.detach(&bb->cfg);
374 pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
375 pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD);
376
377 assert(pb->getExit()->op != OP_CALL);
378 if (pb->getExit()->asFlow()->target.bb == bb)
379 pb->getExit()->asFlow()->target.bb = pn;
380 }
381
382 // insert MOVs (phi->src(j) should stem from j-th in-BB)
383 int j = 0;
384 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
385 pb = BasicBlock::get(ei.getNode());
386 if (!pb->isTerminated())
387 pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
388
389 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
390 mov = new_Instruction(func, OP_MOV, TYPE_U32);
391
392 mov->setSrc(0, phi->getSrc(j));
393 mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
394 phi->setSrc(j, mov->getDef(0));
395
396 pb->insertBefore(pb->getExit(), mov);
397 }
398 ++j;
399 }
400
401 return true;
402 }
403
404 bool
405 RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb)
406 {
407 // Bind function call inputs/outputs to the same physical register
408 // the callee uses, inserting moves as appropriate for the case a
409 // conflict arises.
410 for (Instruction *i = bb->getEntry(); i; i = i->next) {
411 FlowInstruction *cal = i->asFlow();
412 if (!cal || cal->op != OP_CALL || cal->builtin)
413 continue;
414 RegisterSet clobberSet(prog->getTarget());
415
416 // Bind input values.
417 for (int s = 0; cal->srcExists(s); ++s) {
418 LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue());
419 tmp->reg.data.id = cal->target.fn->ins[s].rep()->reg.data.id;
420
421 Instruction *mov =
422 new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
423 mov->setDef(0, tmp);
424 mov->setSrc(0, cal->getSrc(s));
425 cal->setSrc(s, tmp);
426
427 bb->insertBefore(cal, mov);
428 }
429
430 // Bind output values.
431 for (int d = 0; cal->defExists(d); ++d) {
432 LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue());
433 tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id;
434
435 Instruction *mov =
436 new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
437 mov->setSrc(0, tmp);
438 mov->setDef(0, cal->getDef(d));
439 cal->setDef(d, tmp);
440
441 bb->insertAfter(cal, mov);
442 clobberSet.occupy(tmp);
443 }
444
445 // Bind clobbered values.
446 for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin();
447 it != cal->target.fn->clobbers.end();
448 ++it) {
449 if (clobberSet.testOccupy(*it)) {
450 Value *tmp = new_LValue(func, (*it)->asLValue());
451 tmp->reg.data.id = (*it)->reg.data.id;
452 cal->setDef(cal->defCount(), tmp);
453 }
454 }
455 }
456
457 // Update the clobber set of the function.
458 if (BasicBlock::get(func->cfgExit) == bb) {
459 func->buildDefSets();
460 for (unsigned int i = 0; i < bb->defSet.getSize(); ++i)
461 if (bb->defSet.test(i))
462 func->clobbers.push_back(func->getLValue(i));
463 }
464
465 return true;
466 }
467
468 // Build the set of live-in variables of bb.
469 bool
470 RegAlloc::buildLiveSets(BasicBlock *bb)
471 {
472 Function *f = bb->getFunction();
473 BasicBlock *bn;
474 Instruction *i;
475 unsigned int s, d;
476
477 INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
478
479 bb->liveSet.allocate(func->allLValues.getSize(), false);
480
481 int n = 0;
482 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
483 bn = BasicBlock::get(ei.getNode());
484 if (bn == bb)
485 continue;
486 if (bn->cfg.visit(sequence))
487 if (!buildLiveSets(bn))
488 return false;
489 if (n++ || bb->liveSet.marker)
490 bb->liveSet |= bn->liveSet;
491 else
492 bb->liveSet = bn->liveSet;
493 }
494 if (!n && !bb->liveSet.marker)
495 bb->liveSet.fill(0);
496 bb->liveSet.marker = true;
497
498 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
499 INFO("BB:%i live set of out blocks:\n", bb->getId());
500 bb->liveSet.print();
501 }
502
503 // if (!bb->getEntry())
504 // return true;
505
506 if (bb == BasicBlock::get(f->cfgExit)) {
507 for (std::deque<ValueRef>::iterator it = f->outs.begin();
508 it != f->outs.end(); ++it) {
509 assert(it->get()->asLValue());
510 bb->liveSet.set(it->get()->id);
511 }
512 }
513
514 for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
515 for (d = 0; i->defExists(d); ++d)
516 bb->liveSet.clr(i->getDef(d)->id);
517 for (s = 0; i->srcExists(s); ++s)
518 if (i->getSrc(s)->asLValue())
519 bb->liveSet.set(i->getSrc(s)->id);
520 }
521 for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
522 bb->liveSet.clr(i->getDef(0)->id);
523
524 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
525 INFO("BB:%i live set after propagation:\n", bb->getId());
526 bb->liveSet.print();
527 }
528
529 return true;
530 }
531
532 void
533 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
534 {
535 BasicBlock *bbA = NULL, *bbB = NULL;
536
537 if (bb->cfg.outgoingCount()) {
538 // trickery to save a loop of OR'ing liveSets
539 // aliasing works fine with BitSet::setOr
540 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
541 if (ei.getType() == Graph::Edge::DUMMY)
542 continue;
543 if (bbA) {
544 bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
545 bbA = bb;
546 } else {
547 bbA = bbB;
548 }
549 bbB = BasicBlock::get(ei.getNode());
550 }
551 bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
552 } else
553 if (bb->cfg.incidentCount()) {
554 bb->liveSet.fill(0);
555 }
556 }
557
558 bool
559 RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
560 {
561 collectLiveValues(bb);
562
563 INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
564
565 // go through out blocks and delete phi sources that do not originate from
566 // the current block from the live set
567 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
568 BasicBlock *out = BasicBlock::get(ei.getNode());
569
570 for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
571 bb->liveSet.clr(i->getDef(0)->id);
572
573 for (int s = 0; i->srcExists(s); ++s) {
574 assert(i->src(s).getInsn());
575 if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
576 bb->liveSet.set(i->getSrc(s)->id);
577 else
578 bb->liveSet.clr(i->getSrc(s)->id);
579 }
580 }
581 }
582
583 // remaining live-outs are live until end
584 if (bb->getExit()) {
585 for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
586 if (bb->liveSet.test(j))
587 addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
588 }
589
590 for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
591 for (int d = 0; i->defExists(d); ++d) {
592 bb->liveSet.clr(i->getDef(d)->id);
593 if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
594 i->getDef(d)->livei.extend(i->serial, i->serial);
595 }
596
597 for (int s = 0; i->srcExists(s); ++s) {
598 if (!i->getSrc(s)->asLValue())
599 continue;
600 if (!bb->liveSet.test(i->getSrc(s)->id)) {
601 bb->liveSet.set(i->getSrc(s)->id);
602 addLiveRange(i->getSrc(s), bb, i->serial);
603 }
604 }
605 }
606
607 if (bb == BasicBlock::get(func->cfg.getRoot())) {
608 for (std::deque<ValueDef>::iterator it = func->ins.begin();
609 it != func->ins.end(); ++it) {
610 if (it->get()->reg.data.id >= 0) // add hazard for fixed regs
611 it->get()->livei.extend(0, 1);
612 }
613 }
614
615 return true;
616 }
617
618
619 #define JOIN_MASK_PHI (1 << 0)
620 #define JOIN_MASK_UNION (1 << 1)
621 #define JOIN_MASK_MOV (1 << 2)
622 #define JOIN_MASK_TEX (1 << 3)
623
624 class GCRA
625 {
626 public:
627 GCRA(Function *, SpillCodeInserter&);
628 ~GCRA();
629
630 bool allocateRegisters(ArrayList& insns);
631
632 void printNodeInfo() const;
633
634 private:
635 class RIG_Node : public Graph::Node
636 {
637 public:
638 RIG_Node();
639
640 void init(const RegisterSet&, LValue *);
641
642 void addInterference(RIG_Node *);
643 void addRegPreference(RIG_Node *);
644
645 inline LValue *getValue() const
646 {
647 return reinterpret_cast<LValue *>(data);
648 }
649 inline void setValue(LValue *lval) { data = lval; }
650
651 inline uint8_t getCompMask() const
652 {
653 return ((1 << colors) - 1) << (reg & 7);
654 }
655
656 static inline RIG_Node *get(const Graph::EdgeIterator& ei)
657 {
658 return static_cast<RIG_Node *>(ei.getNode());
659 }
660
661 public:
662 uint32_t degree;
663 uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable
664 uint16_t colors;
665
666 DataFile f;
667 int32_t reg;
668
669 float weight;
670
671 // list pointers for simplify() phase
672 RIG_Node *next;
673 RIG_Node *prev;
674
675 // union of the live intervals of all coalesced values (we want to retain
676 // the separate intervals for testing interference of compound values)
677 Interval livei;
678
679 std::list<RIG_Node *> prefRegs;
680 };
681
682 private:
683 inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; }
684
685 void buildRIG(ArrayList&);
686 bool coalesce(ArrayList&);
687 bool doCoalesce(ArrayList&, unsigned int mask);
688 void calculateSpillWeights();
689 void simplify();
690 bool selectRegisters();
691 void cleanup(const bool success);
692
693 void simplifyEdge(RIG_Node *, RIG_Node *);
694 void simplifyNode(RIG_Node *);
695
696 bool coalesceValues(Value *, Value *, bool force);
697 void resolveSplitsAndMerges();
698 void makeCompound(Instruction *, bool isSplit);
699
700 inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&);
701
702 inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *);
703 void checkList(std::list<RIG_Node *>&);
704
705 private:
706 std::stack<uint32_t> stack;
707
708 // list headers for simplify() phase
709 RIG_Node lo[2];
710 RIG_Node hi;
711
712 Graph RIG;
713 RIG_Node *nodes;
714 unsigned int nodeCount;
715
716 Function *func;
717 Program *prog;
718
719 static uint8_t relDegree[17][17];
720
721 RegisterSet regs;
722
723 // need to fixup register id for participants of OP_MERGE/SPLIT
724 std::list<Instruction *> merges;
725 std::list<Instruction *> splits;
726
727 SpillCodeInserter& spill;
728 std::list<ValuePair> mustSpill;
729 };
730
731 uint8_t GCRA::relDegree[17][17];
732
733 GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
734 {
735 colors = 0;
736 }
737
738 void
739 GCRA::printNodeInfo() const
740 {
741 for (unsigned int i = 0; i < nodeCount; ++i) {
742 if (!nodes[i].colors)
743 continue;
744 INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X",
745 i,
746 nodes[i].f,nodes[i].reg,nodes[i].colors,
747 nodes[i].weight,
748 nodes[i].degree, nodes[i].degreeLimit);
749
750 for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next())
751 INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
752 for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next())
753 INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
754 INFO("\n");
755 }
756 }
757
758 void
759 GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
760 {
761 setValue(lval);
762 if (lval->reg.data.id >= 0)
763 lval->noSpill = lval->fixedReg = 1;
764
765 colors = regs.units(lval->reg.file, lval->reg.size);
766 f = lval->reg.file;
767 reg = -1;
768 if (lval->reg.data.id >= 0)
769 reg = regs.idToUnits(lval);
770
771 weight = std::numeric_limits<float>::infinity();
772 degree = 0;
773 degreeLimit = regs.getFileSize(f, lval->reg.size);
774
775 livei.insert(lval->livei);
776 }
777
778 bool
779 GCRA::coalesceValues(Value *dst, Value *src, bool force)
780 {
781 LValue *rep = dst->join->asLValue();
782 LValue *val = src->join->asLValue();
783
784 if (!force && val->reg.data.id >= 0) {
785 rep = src->join->asLValue();
786 val = dst->join->asLValue();
787 }
788 RIG_Node *nRep = &nodes[rep->id];
789 RIG_Node *nVal = &nodes[val->id];
790
791 if (src->reg.file != dst->reg.file) {
792 if (!force)
793 return false;
794 WARN("forced coalescing of values in different files !\n");
795 }
796 if (!force && dst->reg.size != src->reg.size)
797 return false;
798
799 if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) {
800 if (force) {
801 if (val->reg.data.id >= 0)
802 WARN("forced coalescing of values in different fixed regs !\n");
803 } else {
804 if (val->reg.data.id >= 0)
805 return false;
806 // make sure that there is no overlap with the fixed register of rep
807 for (ArrayList::Iterator it = func->allLValues.iterator();
808 !it.end(); it.next()) {
809 Value *reg = reinterpret_cast<Value *>(it.get())->asLValue();
810 assert(reg);
811 if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei))
812 return false;
813 }
814 }
815 }
816
817 if (!force && nRep->livei.overlaps(nVal->livei))
818 return false;
819
820 INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
821 rep->id, rep->reg.data.id, val->id);
822
823 // set join pointer of all values joined with val
824 for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
825 ++def)
826 (*def)->get()->join = rep;
827 assert(rep->join == rep && val->join == rep);
828
829 // add val's definitions to rep and extend the live interval of its RIG node
830 rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end());
831 nRep->livei.unify(nVal->livei);
832 return true;
833 }
834
835 bool
836 GCRA::coalesce(ArrayList& insns)
837 {
838 bool ret = doCoalesce(insns, JOIN_MASK_PHI);
839 if (!ret)
840 return false;
841 switch (func->getProgram()->getTarget()->getChipset() & ~0xf) {
842 case 0x50:
843 case 0x80:
844 case 0x90:
845 case 0xa0:
846 ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX);
847 break;
848 case 0xc0:
849 case 0xd0:
850 case 0xe0:
851 ret = doCoalesce(insns, JOIN_MASK_UNION);
852 break;
853 default:
854 break;
855 }
856 if (!ret)
857 return false;
858 return doCoalesce(insns, JOIN_MASK_MOV);
859 }
860
861 static inline uint8_t makeCompMask(int compSize, int base, int size)
862 {
863 uint8_t m = ((1 << size) - 1) << base;
864
865 switch (compSize) {
866 case 1:
867 return 0xff;
868 case 2:
869 m |= (m << 2);
870 return (m << 4) | m;
871 case 3:
872 case 4:
873 return (m << 4) | m;
874 default:
875 assert(compSize <= 8);
876 return m;
877 }
878 }
879
880 // Used when coalescing moves. The non-compound value will become one, e.g.:
881 // mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 }
882 // split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
883 static inline void copyCompound(Value *dst, Value *src)
884 {
885 LValue *ldst = dst->asLValue();
886 LValue *lsrc = src->asLValue();
887
888 if (ldst->compound && !lsrc->compound) {
889 LValue *swap = lsrc;
890 lsrc = ldst;
891 ldst = swap;
892 }
893
894 ldst->compound = lsrc->compound;
895 ldst->compMask = lsrc->compMask;
896 }
897
898 void
899 GCRA::makeCompound(Instruction *insn, bool split)
900 {
901 LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue();
902
903 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
904 INFO("makeCompound(split = %i): ", split);
905 insn->print();
906 }
907
908 const unsigned int size = getNode(rep)->colors;
909 unsigned int base = 0;
910
911 if (!rep->compound)
912 rep->compMask = 0xff;
913 rep->compound = 1;
914
915 for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) {
916 LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue();
917
918 val->compound = 1;
919 if (!val->compMask)
920 val->compMask = 0xff;
921 val->compMask &= makeCompMask(size, base, getNode(val)->colors);
922 assert(val->compMask);
923
924 INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n",
925 rep->id, rep->compMask, val->id, val->compMask);
926
927 base += getNode(val)->colors;
928 }
929 assert(base == size);
930 }
931
932 bool
933 GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
934 {
935 int c, n;
936
937 for (n = 0; n < insns.getSize(); ++n) {
938 Instruction *i;
939 Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n));
940
941 switch (insn->op) {
942 case OP_PHI:
943 if (!(mask & JOIN_MASK_PHI))
944 break;
945 for (c = 0; insn->srcExists(c); ++c)
946 if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) {
947 // this is bad
948 ERROR("failed to coalesce phi operands\n");
949 return false;
950 }
951 break;
952 case OP_UNION:
953 case OP_MERGE:
954 if (!(mask & JOIN_MASK_UNION))
955 break;
956 for (c = 0; insn->srcExists(c); ++c)
957 coalesceValues(insn->getDef(0), insn->getSrc(c), true);
958 if (insn->op == OP_MERGE) {
959 merges.push_back(insn);
960 if (insn->srcExists(1))
961 makeCompound(insn, false);
962 }
963 break;
964 case OP_SPLIT:
965 if (!(mask & JOIN_MASK_UNION))
966 break;
967 splits.push_back(insn);
968 for (c = 0; insn->defExists(c); ++c)
969 coalesceValues(insn->getSrc(0), insn->getDef(c), true);
970 makeCompound(insn, true);
971 break;
972 case OP_MOV:
973 if (!(mask & JOIN_MASK_MOV))
974 break;
975 i = NULL;
976 if (!insn->getDef(0)->uses.empty())
977 i = insn->getDef(0)->uses.front()->getInsn();
978 // if this is a contraint-move there will only be a single use
979 if (i && i->op == OP_MERGE) // do we really still need this ?
980 break;
981 i = insn->getSrc(0)->getUniqueInsn();
982 if (i && !i->constrainedDefs()) {
983 if (coalesceValues(insn->getDef(0), insn->getSrc(0), false))
984 copyCompound(insn->getSrc(0), insn->getDef(0));
985 }
986 break;
987 case OP_TEX:
988 case OP_TXB:
989 case OP_TXL:
990 case OP_TXF:
991 case OP_TXQ:
992 case OP_TXD:
993 case OP_TXG:
994 case OP_TEXCSAA:
995 if (!(mask & JOIN_MASK_TEX))
996 break;
997 for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c)
998 coalesceValues(insn->getDef(c), insn->getSrc(c), true);
999 break;
1000 default:
1001 break;
1002 }
1003 }
1004 return true;
1005 }
1006
1007 void
1008 GCRA::RIG_Node::addInterference(RIG_Node *node)
1009 {
1010 this->degree += relDegree[node->colors][colors];
1011 node->degree += relDegree[colors][node->colors];
1012
1013 this->attach(node, Graph::Edge::CROSS);
1014 }
1015
1016 void
1017 GCRA::RIG_Node::addRegPreference(RIG_Node *node)
1018 {
1019 prefRegs.push_back(node);
1020 }
1021
1022 GCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
1023 func(fn),
1024 regs(fn->getProgram()->getTarget()),
1025 spill(spill)
1026 {
1027 prog = func->getProgram();
1028
1029 // initialize relative degrees array - i takes away from j
1030 for (int i = 1; i <= 16; ++i)
1031 for (int j = 1; j <= 16; ++j)
1032 relDegree[i][j] = j * ((i + j - 1) / j);
1033 }
1034
1035 GCRA::~GCRA()
1036 {
1037 if (nodes)
1038 delete[] nodes;
1039 }
1040
1041 void
1042 GCRA::checkList(std::list<RIG_Node *>& lst)
1043 {
1044 GCRA::RIG_Node *prev = NULL;
1045
1046 for (std::list<RIG_Node *>::iterator it = lst.begin();
1047 it != lst.end();
1048 ++it) {
1049 assert((*it)->getValue()->join == (*it)->getValue());
1050 if (prev)
1051 assert(prev->livei.begin() <= (*it)->livei.begin());
1052 prev = *it;
1053 }
1054 }
1055
1056 void
1057 GCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node)
1058 {
1059 if (node->livei.isEmpty())
1060 return;
1061 // only the intervals of joined values don't necessarily arrive in order
1062 std::list<RIG_Node *>::iterator prev, it;
1063 for (it = list.end(); it != list.begin(); it = prev) {
1064 prev = it;
1065 --prev;
1066 if ((*prev)->livei.begin() <= node->livei.begin())
1067 break;
1068 }
1069 list.insert(it, node);
1070 }
1071
1072 void
1073 GCRA::buildRIG(ArrayList& insns)
1074 {
1075 std::list<RIG_Node *> values, active;
1076
1077 for (std::deque<ValueDef>::iterator it = func->ins.begin();
1078 it != func->ins.end(); ++it)
1079 insertOrderedTail(values, getNode(it->get()->asLValue()));
1080
1081 for (int i = 0; i < insns.getSize(); ++i) {
1082 Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i));
1083 for (int d = 0; insn->defExists(d); ++d)
1084 if (insn->getDef(d)->rep() == insn->getDef(d))
1085 insertOrderedTail(values, getNode(insn->getDef(d)->asLValue()));
1086 }
1087 checkList(values);
1088
1089 while (!values.empty()) {
1090 RIG_Node *cur = values.front();
1091
1092 for (std::list<RIG_Node *>::iterator it = active.begin();
1093 it != active.end();) {
1094 RIG_Node *node = *it;
1095
1096 if (node->livei.end() <= cur->livei.begin()) {
1097 it = active.erase(it);
1098 } else {
1099 if (node->f == cur->f && node->livei.overlaps(cur->livei))
1100 cur->addInterference(node);
1101 ++it;
1102 }
1103 }
1104 values.pop_front();
1105 active.push_back(cur);
1106 }
1107 }
1108
1109 void
1110 GCRA::calculateSpillWeights()
1111 {
1112 for (unsigned int i = 0; i < nodeCount; ++i) {
1113 RIG_Node *const n = &nodes[i];
1114 if (!nodes[i].colors || nodes[i].livei.isEmpty())
1115 continue;
1116 if (nodes[i].reg >= 0) {
1117 // update max reg
1118 regs.occupy(n->f, n->reg, n->colors);
1119 continue;
1120 }
1121 LValue *val = nodes[i].getValue();
1122
1123 if (!val->noSpill) {
1124 int rc = 0;
1125 for (Value::DefIterator it = val->defs.begin();
1126 it != val->defs.end();
1127 ++it)
1128 rc += (*it)->get()->refCount();
1129
1130 nodes[i].weight =
1131 (float)rc * (float)rc / (float)nodes[i].livei.extent();
1132 }
1133
1134 if (nodes[i].degree < nodes[i].degreeLimit) {
1135 int l = 0;
1136 if (val->reg.size > 4)
1137 l = 1;
1138 DLLIST_ADDHEAD(&lo[l], &nodes[i]);
1139 } else {
1140 DLLIST_ADDHEAD(&hi, &nodes[i]);
1141 }
1142 }
1143 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1144 printNodeInfo();
1145 }
1146
1147 void
1148 GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b)
1149 {
1150 bool move = b->degree >= b->degreeLimit;
1151
1152 INFO_DBG(prog->dbgFlags, REG_ALLOC,
1153 "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n",
1154 a->getValue()->id, a->degree, a->degreeLimit,
1155 b->getValue()->id, b->degree, b->degreeLimit);
1156
1157 b->degree -= relDegree[a->colors][b->colors];
1158
1159 move = move && b->degree < b->degreeLimit;
1160 if (move && !DLLIST_EMPTY(b)) {
1161 int l = (b->getValue()->reg.size > 4) ? 1 : 0;
1162 DLLIST_DEL(b);
1163 DLLIST_ADDTAIL(&lo[l], b);
1164 }
1165 }
1166
1167 void
1168 GCRA::simplifyNode(RIG_Node *node)
1169 {
1170 for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
1171 simplifyEdge(node, RIG_Node::get(ei));
1172
1173 for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
1174 simplifyEdge(node, RIG_Node::get(ei));
1175
1176 DLLIST_DEL(node);
1177 stack.push(node->getValue()->id);
1178
1179 INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n",
1180 node->getValue()->id,
1181 (node->degree < node->degreeLimit) ? "" : "(spill)");
1182 }
1183
1184 void
1185 GCRA::simplify()
1186 {
1187 for (;;) {
1188 if (!DLLIST_EMPTY(&lo[0])) {
1189 do {
1190 simplifyNode(lo[0].next);
1191 } while (!DLLIST_EMPTY(&lo[0]));
1192 } else
1193 if (!DLLIST_EMPTY(&lo[1])) {
1194 simplifyNode(lo[1].next);
1195 } else
1196 if (!DLLIST_EMPTY(&hi)) {
1197 RIG_Node *best = hi.next;
1198 float bestScore = best->weight / (float)best->degree;
1199 // spill candidate
1200 for (RIG_Node *it = best->next; it != &hi; it = it->next) {
1201 float score = it->weight / (float)it->degree;
1202 if (score < bestScore) {
1203 best = it;
1204 bestScore = score;
1205 }
1206 }
1207 if (isinf(bestScore)) {
1208 ERROR("no viable spill candidates left\n");
1209 break;
1210 }
1211 simplifyNode(best);
1212 } else {
1213 break;
1214 }
1215 }
1216 }
1217
1218 void
1219 GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei)
1220 {
1221 const RIG_Node *intf = RIG_Node::get(ei);
1222
1223 if (intf->reg < 0)
1224 return;
1225 const LValue *vA = node->getValue();
1226 const LValue *vB = intf->getValue();
1227
1228 const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7);
1229
1230 if (vA->compound | vB->compound) {
1231 // NOTE: this only works for >aligned< register tuples !
1232 for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) {
1233 for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) {
1234 const LValue *vD = (*D)->get()->asLValue();
1235 const LValue *vd = (*d)->get()->asLValue();
1236
1237 if (!vD->livei.overlaps(vd->livei)) {
1238 INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n",
1239 vD->id, vd->id);
1240 continue;
1241 }
1242
1243 uint8_t mask = vD->compound ? vD->compMask : ~0;
1244 if (vd->compound) {
1245 assert(vB->compound);
1246 mask &= vd->compMask & vB->compMask;
1247 } else {
1248 mask &= intfMask;
1249 }
1250
1251 INFO_DBG(prog->dbgFlags, REG_ALLOC,
1252 "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n",
1253 vD->id,
1254 vD->compound ? vD->compMask : 0xff,
1255 vd->id,
1256 vd->compound ? vd->compMask : intfMask,
1257 vB->compMask, intf->reg & ~7, mask);
1258 if (mask)
1259 regs.occupyMask(node->f, intf->reg & ~7, mask);
1260 }
1261 }
1262 } else {
1263 INFO_DBG(prog->dbgFlags, REG_ALLOC,
1264 "(%%%i) X (%%%i): $r%i + %u\n",
1265 vA->id, vB->id, intf->reg, intf->colors);
1266 regs.occupy(node->f, intf->reg, intf->colors);
1267 }
1268 }
1269
1270 bool
1271 GCRA::selectRegisters()
1272 {
1273 INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n");
1274
1275 while (!stack.empty()) {
1276 RIG_Node *node = &nodes[stack.top()];
1277 stack.pop();
1278
1279 regs.reset(node->f);
1280
1281 INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n",
1282 node->getValue()->id, node->colors);
1283
1284 for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
1285 checkInterference(node, ei);
1286 for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
1287 checkInterference(node, ei);
1288
1289 if (!node->prefRegs.empty()) {
1290 for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin();
1291 it != node->prefRegs.end();
1292 ++it) {
1293 if ((*it)->reg >= 0 &&
1294 regs.testOccupy(node->f, (*it)->reg, node->colors)) {
1295 node->reg = (*it)->reg;
1296 break;
1297 }
1298 }
1299 }
1300 if (node->reg >= 0)
1301 continue;
1302 LValue *lval = node->getValue();
1303 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1304 regs.print();
1305 bool ret = regs.assign(node->reg, node->f, node->colors);
1306 if (ret) {
1307 INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg);
1308 lval->compMask = node->getCompMask();
1309 } else {
1310 INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n",
1311 lval->id, lval->reg.size);
1312 Symbol *slot = NULL;
1313 if (lval->reg.file == FILE_GPR)
1314 slot = spill.assignSlot(node->livei, lval->reg.size);
1315 mustSpill.push_back(ValuePair(lval, slot));
1316 }
1317 }
1318 if (!mustSpill.empty())
1319 return false;
1320 for (unsigned int i = 0; i < nodeCount; ++i) {
1321 LValue *lval = nodes[i].getValue();
1322 if (nodes[i].reg >= 0 && nodes[i].colors > 0)
1323 lval->reg.data.id =
1324 regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size);
1325 }
1326 return true;
1327 }
1328
1329 bool
1330 GCRA::allocateRegisters(ArrayList& insns)
1331 {
1332 bool ret;
1333
1334 INFO_DBG(prog->dbgFlags, REG_ALLOC,
1335 "allocateRegisters to %u instructions\n", insns.getSize());
1336
1337 nodeCount = func->allLValues.getSize();
1338 nodes = new RIG_Node[nodeCount];
1339 if (!nodes)
1340 return false;
1341 for (unsigned int i = 0; i < nodeCount; ++i) {
1342 LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i));
1343 if (lval) {
1344 nodes[i].init(regs, lval);
1345 RIG.insert(&nodes[i]);
1346 }
1347 }
1348
1349 // coalesce first, we use only 1 RIG node for a group of joined values
1350 ret = coalesce(insns);
1351 if (!ret)
1352 goto out;
1353
1354 if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1355 func->printLiveIntervals();
1356
1357 buildRIG(insns);
1358 calculateSpillWeights();
1359 simplify();
1360
1361 ret = selectRegisters();
1362 if (!ret) {
1363 INFO_DBG(prog->dbgFlags, REG_ALLOC,
1364 "selectRegisters failed, inserting spill code ...\n");
1365 regs.reset(FILE_GPR, true);
1366 spill.run(mustSpill);
1367 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1368 func->print();
1369 } else {
1370 prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR));
1371 }
1372
1373 out:
1374 cleanup(ret);
1375 return ret;
1376 }
1377
1378 void
1379 GCRA::cleanup(const bool success)
1380 {
1381 mustSpill.clear();
1382
1383 for (ArrayList::Iterator it = func->allLValues.iterator();
1384 !it.end(); it.next()) {
1385 LValue *lval = reinterpret_cast<LValue *>(it.get());
1386
1387 lval->livei.clear();
1388
1389 lval->compound = 0;
1390 lval->compMask = 0;
1391
1392 if (lval->join == lval)
1393 continue;
1394
1395 if (success) {
1396 lval->reg.data.id = lval->join->reg.data.id;
1397 } else {
1398 for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
1399 ++d)
1400 lval->join->defs.remove(*d);
1401 lval->join = lval;
1402 }
1403 }
1404
1405 if (success)
1406 resolveSplitsAndMerges();
1407 splits.clear(); // avoid duplicate entries on next coalesce pass
1408 merges.clear();
1409
1410 delete[] nodes;
1411 nodes = NULL;
1412 }
1413
1414 Symbol *
1415 SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size)
1416 {
1417 SpillSlot slot;
1418 int32_t offsetBase = stackSize;
1419 int32_t offset;
1420 std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin();
1421
1422 if (offsetBase % size)
1423 offsetBase += size - (offsetBase % size);
1424
1425 slot.sym = NULL;
1426
1427 for (offset = offsetBase; offset < stackSize; offset += size) {
1428 const int32_t entryEnd = offset + size;
1429 while (it != slots.end() && it->offset < offset)
1430 ++it;
1431 if (it == slots.end()) // no slots left
1432 break;
1433 std::list<SpillSlot>::iterator bgn = it;
1434
1435 while (it != slots.end() && it->offset < entryEnd) {
1436 it->occup.print();
1437 if (it->occup.overlaps(livei))
1438 break;
1439 ++it;
1440 }
1441 if (it == slots.end() || it->offset >= entryEnd) {
1442 // fits
1443 for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) {
1444 bgn->occup.insert(livei);
1445 if (bgn->size() == size)
1446 slot.sym = bgn->sym;
1447 }
1448 break;
1449 }
1450 }
1451 if (!slot.sym) {
1452 stackSize = offset + size;
1453 slot.offset = offset;
1454 slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL);
1455 if (!func->stackPtr)
1456 offset += func->tlsBase;
1457 slot.sym->setAddress(NULL, offset);
1458 slot.sym->reg.size = size;
1459 slots.insert(pos, slot)->occup.insert(livei);
1460 }
1461 return slot.sym;
1462 }
1463
1464 void
1465 SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
1466 {
1467 const DataType ty = typeOfSize(slot->reg.size);
1468
1469 Instruction *st;
1470 if (slot->reg.file == FILE_MEMORY_LOCAL) {
1471 st = new_Instruction(func, OP_STORE, ty);
1472 st->setSrc(0, slot);
1473 st->setSrc(1, lval);
1474 lval->noSpill = 1;
1475 } else {
1476 st = new_Instruction(func, OP_CVT, ty);
1477 st->setDef(0, slot);
1478 st->setSrc(0, lval);
1479 }
1480 defi->bb->insertAfter(defi, st);
1481 }
1482
1483 LValue *
1484 SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
1485 {
1486 const DataType ty = typeOfSize(slot->reg.size);
1487
1488 lval = cloneShallow(func, lval);
1489
1490 Instruction *ld;
1491 if (slot->reg.file == FILE_MEMORY_LOCAL) {
1492 lval->noSpill = 1;
1493 ld = new_Instruction(func, OP_LOAD, ty);
1494 } else {
1495 ld = new_Instruction(func, OP_CVT, ty);
1496 }
1497 ld->setDef(0, lval);
1498 ld->setSrc(0, slot);
1499
1500 usei->bb->insertBefore(usei, ld);
1501 return lval;
1502 }
1503
1504 bool
1505 SpillCodeInserter::run(const std::list<ValuePair>& lst)
1506 {
1507 for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end();
1508 ++it) {
1509 LValue *lval = it->first->asLValue();
1510 Symbol *mem = it->second ? it->second->asSym() : NULL;
1511
1512 for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
1513 ++d) {
1514 Value *slot = mem ?
1515 static_cast<Value *>(mem) : new_LValue(func, FILE_GPR);
1516 Value *tmp = NULL;
1517 Instruction *last = NULL;
1518
1519 LValue *dval = (*d)->get()->asLValue();
1520 Instruction *defi = (*d)->getInsn();
1521
1522 // handle uses first or they'll contain the spill stores
1523 while (!dval->uses.empty()) {
1524 ValueRef *u = dval->uses.front();
1525 Instruction *usei = u->getInsn();
1526 assert(usei);
1527 if (usei->op == OP_PHI) {
1528 tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot;
1529 last = NULL;
1530 } else
1531 if (!last || usei != last->next) { // TODO: sort uses
1532 tmp = unspill(usei, dval, slot);
1533 last = usei;
1534 }
1535 u->set(tmp);
1536 }
1537
1538 assert(defi);
1539 if (defi->op == OP_PHI) {
1540 d = lval->defs.erase(d);
1541 --d;
1542 if (slot->reg.file == FILE_MEMORY_LOCAL)
1543 delete_Instruction(func->getProgram(), defi);
1544 else
1545 defi->setDef(0, slot);
1546 } else {
1547 spill(defi, slot, dval);
1548 }
1549 }
1550
1551 }
1552
1553 // TODO: We're not trying to reuse old slots in a potential next iteration.
1554 // We have to update the slots' livei intervals to be able to do that.
1555 stackBase = stackSize;
1556 slots.clear();
1557 return true;
1558 }
1559
1560 bool
1561 RegAlloc::exec()
1562 {
1563 for (IteratorRef it = prog->calls.iteratorDFS(false);
1564 !it->end(); it->next()) {
1565 func = Function::get(reinterpret_cast<Graph::Node *>(it->get()));
1566
1567 func->tlsBase = prog->tlsSize;
1568 if (!execFunc())
1569 return false;
1570 prog->tlsSize += func->tlsSize;
1571 }
1572 return true;
1573 }
1574
1575 bool
1576 RegAlloc::execFunc()
1577 {
1578 InsertConstraintsPass insertConstr;
1579 PhiMovesPass insertPhiMoves;
1580 ArgumentMovesPass insertArgMoves;
1581 BuildIntervalsPass buildIntervals;
1582 SpillCodeInserter insertSpills(func);
1583
1584 GCRA gcra(func, insertSpills);
1585
1586 unsigned int i, retries;
1587 bool ret;
1588
1589 ret = insertConstr.exec(func);
1590 if (!ret)
1591 goto out;
1592
1593 ret = insertPhiMoves.run(func);
1594 if (!ret)
1595 goto out;
1596
1597 ret = insertArgMoves.run(func);
1598 if (!ret)
1599 goto out;
1600
1601 // TODO: need to fix up spill slot usage ranges to support > 1 retry
1602 for (retries = 0; retries < 3; ++retries) {
1603 if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC))
1604 INFO("Retry: %i\n", retries);
1605 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1606 func->print();
1607
1608 // spilling to registers may add live ranges, need to rebuild everything
1609 ret = true;
1610 for (sequence = func->cfg.nextSequence(), i = 0;
1611 ret && i <= func->loopNestingBound;
1612 sequence = func->cfg.nextSequence(), ++i)
1613 ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
1614 if (!ret)
1615 break;
1616 func->orderInstructions(this->insns);
1617
1618 ret = buildIntervals.run(func);
1619 if (!ret)
1620 break;
1621 ret = gcra.allocateRegisters(insns);
1622 if (ret)
1623 break; // success
1624 }
1625 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret);
1626
1627 func->tlsSize = insertSpills.getStackSize();
1628 out:
1629 return ret;
1630 }
1631
1632 // TODO: check if modifying Instruction::join here breaks anything
1633 void
1634 GCRA::resolveSplitsAndMerges()
1635 {
1636 for (std::list<Instruction *>::iterator it = splits.begin();
1637 it != splits.end();
1638 ++it) {
1639 Instruction *split = *it;
1640 unsigned int reg = regs.idToBytes(split->getSrc(0));
1641 for (int d = 0; split->defExists(d); ++d) {
1642 Value *v = split->getDef(d);
1643 v->reg.data.id = regs.bytesToId(v, reg);
1644 v->join = v;
1645 reg += v->reg.size;
1646 }
1647 }
1648 splits.clear();
1649
1650 for (std::list<Instruction *>::iterator it = merges.begin();
1651 it != merges.end();
1652 ++it) {
1653 Instruction *merge = *it;
1654 unsigned int reg = regs.idToBytes(merge->getDef(0));
1655 for (int s = 0; merge->srcExists(s); ++s) {
1656 Value *v = merge->getSrc(s);
1657 v->reg.data.id = regs.bytesToId(v, reg);
1658 v->join = v;
1659 reg += v->reg.size;
1660 }
1661 }
1662 merges.clear();
1663 }
1664
1665 bool Program::registerAllocation()
1666 {
1667 RegAlloc ra(this);
1668 return ra.exec();
1669 }
1670
1671 bool
1672 RegAlloc::InsertConstraintsPass::exec(Function *ir)
1673 {
1674 constrList.clear();
1675
1676 bool ret = run(ir, true, true);
1677 if (ret)
1678 ret = insertConstraintMoves();
1679 return ret;
1680 }
1681
1682 // TODO: make part of texture insn
1683 void
1684 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
1685 {
1686 Value *def[4];
1687 int c, k, d;
1688 uint8_t mask = 0;
1689
1690 for (d = 0, k = 0, c = 0; c < 4; ++c) {
1691 if (!(tex->tex.mask & (1 << c)))
1692 continue;
1693 if (tex->getDef(k)->refCount()) {
1694 mask |= 1 << c;
1695 def[d++] = tex->getDef(k);
1696 }
1697 ++k;
1698 }
1699 tex->tex.mask = mask;
1700
1701 for (c = 0; c < d; ++c)
1702 tex->setDef(c, def[c]);
1703 for (; c < 4; ++c)
1704 tex->setDef(c, NULL);
1705 }
1706
1707 bool
1708 RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
1709 {
1710 Value *v = cst->getSrc(s);
1711
1712 // current register allocation can't handle it if a value participates in
1713 // multiple constraints
1714 for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) {
1715 if (cst != (*it)->getInsn())
1716 return true;
1717 }
1718
1719 // can start at s + 1 because detectConflict is called on all sources
1720 for (int c = s + 1; cst->srcExists(c); ++c)
1721 if (v == cst->getSrc(c))
1722 return true;
1723
1724 Instruction *defi = v->getInsn();
1725
1726 return (!defi || defi->constrainedDefs());
1727 }
1728
1729 void
1730 RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
1731 {
1732 Instruction *cst;
1733 int d;
1734
1735 // first, look for an existing identical constraint op
1736 for (std::list<Instruction *>::iterator it = constrList.begin();
1737 it != constrList.end();
1738 ++it) {
1739 cst = (*it);
1740 if (!i->bb->dominatedBy(cst->bb))
1741 break;
1742 for (d = 0; d < n; ++d)
1743 if (cst->getSrc(d) != i->getSrc(d + s))
1744 break;
1745 if (d >= n) {
1746 for (d = 0; d < n; ++d, ++s)
1747 i->setSrc(s, cst->getDef(d));
1748 return;
1749 }
1750 }
1751 cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
1752
1753 for (d = 0; d < n; ++s, ++d) {
1754 cst->setDef(d, new_LValue(func, FILE_GPR));
1755 cst->setSrc(d, i->getSrc(s));
1756 i->setSrc(s, cst->getDef(d));
1757 }
1758 i->bb->insertBefore(i, cst);
1759
1760 constrList.push_back(cst);
1761 }
1762
1763 // Add a dummy use of the pointer source of >= 8 byte loads after the load
1764 // to prevent it from being assigned a register which overlapping the load's
1765 // destination, which would produce random corruptions.
1766 void
1767 RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
1768 {
1769 Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
1770 hzd->setSrc(0, src->get());
1771 i->bb->insertAfter(i, hzd);
1772
1773 }
1774
1775 // b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q
1776 void
1777 RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn)
1778 {
1779 uint8_t size = 0;
1780 int n;
1781 for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n)
1782 size += insn->getDef(n)->reg.size;
1783 if (n < 2)
1784 return;
1785 LValue *lval = new_LValue(func, FILE_GPR);
1786 lval->reg.size = size;
1787
1788 Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size));
1789 split->setSrc(0, lval);
1790 for (int d = 0; d < n; ++d) {
1791 split->setDef(d, insn->getDef(d));
1792 insn->setDef(d, NULL);
1793 }
1794 insn->setDef(0, lval);
1795
1796 for (int k = 1, d = n; insn->defExists(d); ++d, ++k) {
1797 insn->setDef(k, insn->getDef(d));
1798 insn->setDef(d, NULL);
1799 }
1800 // carry over predicate if any (mainly for OP_UNION uses)
1801 split->setPredicate(insn->cc, insn->getPredicate());
1802
1803 insn->bb->insertAfter(insn, split);
1804 constrList.push_back(split);
1805 }
1806 void
1807 RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
1808 const int a, const int b)
1809 {
1810 uint8_t size = 0;
1811 if (a >= b)
1812 return;
1813 for (int s = a; s <= b; ++s)
1814 size += insn->getSrc(s)->reg.size;
1815 if (!size)
1816 return;
1817 LValue *lval = new_LValue(func, FILE_GPR);
1818 lval->reg.size = size;
1819
1820 Value *save[3];
1821 insn->takeExtraSources(0, save);
1822
1823 Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size));
1824 merge->setDef(0, lval);
1825 for (int s = a, i = 0; s <= b; ++s, ++i) {
1826 merge->setSrc(i, insn->getSrc(s));
1827 insn->setSrc(s, NULL);
1828 }
1829 insn->setSrc(a, lval);
1830
1831 for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) {
1832 insn->setSrc(k, insn->getSrc(s));
1833 insn->setSrc(s, NULL);
1834 }
1835 insn->bb->insertBefore(insn, merge);
1836
1837 insn->putExtraSources(0, save);
1838
1839 constrList.push_back(merge);
1840 }
1841
1842 void
1843 RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
1844 {
1845 textureMask(tex);
1846 condenseDefs(tex);
1847
1848 int n = tex->srcCount(0xff, true);
1849 if (n > 4) {
1850 condenseSrcs(tex, 0, 3);
1851 if (n > 5) // NOTE: first call modified positions already
1852 condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
1853 } else
1854 if (n > 1) {
1855 condenseSrcs(tex, 0, n - 1);
1856 }
1857 }
1858
1859 void
1860 RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex)
1861 {
1862 int n, s;
1863
1864 textureMask(tex);
1865
1866 if (tex->op == OP_TXQ) {
1867 s = tex->srcCount(0xff);
1868 n = 0;
1869 } else {
1870 s = tex->tex.target.getArgCount();
1871 if (!tex->tex.target.isArray() &&
1872 (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
1873 ++s;
1874 if (tex->op == OP_TXD && tex->tex.useOffsets)
1875 ++s;
1876 n = tex->srcCount(0xff) - s;
1877 assert(n <= 4);
1878 }
1879
1880 if (s > 1)
1881 condenseSrcs(tex, 0, s - 1);
1882 if (n > 1) // NOTE: first call modified positions already
1883 condenseSrcs(tex, 1, n);
1884
1885 condenseDefs(tex);
1886 }
1887
1888 void
1889 RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
1890 {
1891 Value *pred = tex->getPredicate();
1892 if (pred)
1893 tex->setPredicate(tex->cc, NULL);
1894
1895 textureMask(tex);
1896
1897 assert(tex->defExists(0) && tex->srcExists(0));
1898 // make src and def count match
1899 int c;
1900 for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
1901 if (!tex->srcExists(c))
1902 tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
1903 if (!tex->defExists(c))
1904 tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
1905 }
1906 if (pred)
1907 tex->setPredicate(tex->cc, pred);
1908 condenseDefs(tex);
1909 condenseSrcs(tex, 0, c - 1);
1910 }
1911
1912 // Insert constraint markers for instructions whose multiple sources must be
1913 // located in consecutive registers.
1914 bool
1915 RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
1916 {
1917 TexInstruction *tex;
1918 Instruction *next;
1919 int s, size;
1920
1921 targ = bb->getProgram()->getTarget();
1922
1923 for (Instruction *i = bb->getEntry(); i; i = next) {
1924 next = i->next;
1925
1926 if ((tex = i->asTex())) {
1927 switch (targ->getChipset() & ~0xf) {
1928 case 0x50:
1929 case 0x80:
1930 case 0x90:
1931 case 0xa0:
1932 texConstraintNV50(tex);
1933 break;
1934 case 0xc0:
1935 case 0xd0:
1936 texConstraintNVC0(tex);
1937 break;
1938 case 0xe0:
1939 case NVISA_GK110_CHIPSET:
1940 texConstraintNVE0(tex);
1941 break;
1942 default:
1943 break;
1944 }
1945 } else
1946 if (i->op == OP_EXPORT || i->op == OP_STORE) {
1947 for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
1948 assert(i->srcExists(s));
1949 size -= i->getSrc(s)->reg.size;
1950 }
1951 condenseSrcs(i, 1, s - 1);
1952 } else
1953 if (i->op == OP_LOAD || i->op == OP_VFETCH) {
1954 condenseDefs(i);
1955 if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
1956 addHazard(i, i->src(0).getIndirect(0));
1957 } else
1958 if (i->op == OP_UNION) {
1959 constrList.push_back(i);
1960 }
1961 }
1962 return true;
1963 }
1964
1965 // Insert extra moves so that, if multiple register constraints on a value are
1966 // in conflict, these conflicts can be resolved.
1967 bool
1968 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
1969 {
1970 for (std::list<Instruction *>::iterator it = constrList.begin();
1971 it != constrList.end();
1972 ++it) {
1973 Instruction *cst = *it;
1974 Instruction *mov;
1975
1976 if (cst->op == OP_SPLIT && 0) {
1977 // spilling splits is annoying, just make sure they're separate
1978 for (int d = 0; cst->defExists(d); ++d) {
1979 if (!cst->getDef(d)->refCount())
1980 continue;
1981 LValue *lval = new_LValue(func, cst->def(d).getFile());
1982 const uint8_t size = cst->def(d).getSize();
1983 lval->reg.size = size;
1984
1985 mov = new_Instruction(func, OP_MOV, typeOfSize(size));
1986 mov->setSrc(0, lval);
1987 mov->setDef(0, cst->getDef(d));
1988 cst->setDef(d, mov->getSrc(0));
1989 cst->bb->insertAfter(cst, mov);
1990
1991 cst->getSrc(0)->asLValue()->noSpill = 1;
1992 mov->getSrc(0)->asLValue()->noSpill = 1;
1993 }
1994 } else
1995 if (cst->op == OP_MERGE || cst->op == OP_UNION) {
1996 for (int s = 0; cst->srcExists(s); ++s) {
1997 const uint8_t size = cst->src(s).getSize();
1998
1999 if (!cst->getSrc(s)->defs.size()) {
2000 mov = new_Instruction(func, OP_NOP, typeOfSize(size));
2001 mov->setDef(0, cst->getSrc(s));
2002 cst->bb->insertBefore(cst, mov);
2003 continue;
2004 }
2005 assert(cst->getSrc(s)->defs.size() == 1); // still SSA
2006
2007 Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
2008 // catch some cases where don't really need MOVs
2009 if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs())
2010 continue;
2011
2012 LValue *lval = new_LValue(func, cst->src(s).getFile());
2013 lval->reg.size = size;
2014
2015 mov = new_Instruction(func, OP_MOV, typeOfSize(size));
2016 mov->setDef(0, lval);
2017 mov->setSrc(0, cst->getSrc(s));
2018 cst->setSrc(s, mov->getDef(0));
2019 cst->bb->insertBefore(cst, mov);
2020
2021 cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help
2022
2023 if (cst->op == OP_UNION)
2024 mov->setPredicate(defi->cc, defi->getPredicate());
2025 }
2026 }
2027 }
2028
2029 return true;
2030 }
2031
2032 } // namespace nv50_ir