nv50/ir/tgsi: translate SNE as unordered comparison
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir_ra.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir.h"
24 #include "nv50_ir_target.h"
25
26 #include "nv50/nv50_debug.h"
27
28 namespace nv50_ir {
29
30 #define MAX_REGISTER_FILE_SIZE 256
31
32 class RegisterSet
33 {
34 public:
35 RegisterSet();
36 RegisterSet(const Target *);
37
38 void init(const Target *);
39 void reset(); // reset allocation status, but not max assigned regs
40
41 void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
42 void intersect(DataFile f, const RegisterSet *);
43
44 bool assign(Value **, int nr);
45 void release(const Value *);
46 void occupy(const Value *);
47
48 int getMaxAssigned(DataFile f) const { return fill[f]; }
49
50 void print() const;
51
52 private:
53 uint32_t bits[FILE_ADDRESS + 1][(MAX_REGISTER_FILE_SIZE + 31) / 32];
54
55 int unit[FILE_ADDRESS + 1]; // log2 of allocation granularity
56
57 int last[FILE_ADDRESS + 1];
58 int fill[FILE_ADDRESS + 1];
59 };
60
61 void
62 RegisterSet::reset()
63 {
64 memset(bits, 0, sizeof(bits));
65 }
66
67 RegisterSet::RegisterSet()
68 {
69 reset();
70 }
71
72 void
73 RegisterSet::init(const Target *targ)
74 {
75 for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
76 DataFile f = static_cast<DataFile>(rf);
77 last[rf] = targ->getFileSize(f) - 1;
78 unit[rf] = targ->getFileUnit(f);
79 fill[rf] = -1;
80 assert(last[rf] < MAX_REGISTER_FILE_SIZE);
81 }
82 }
83
84 RegisterSet::RegisterSet(const Target *targ)
85 {
86 reset();
87 init(targ);
88 }
89
90 void
91 RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
92 {
93 for (int i = 0; i < (last[f] + 31) / 32; ++i)
94 bits[f][i] = (bits[f][i] | lock) & ~unlock;
95 }
96
97 void
98 RegisterSet::intersect(DataFile f, const RegisterSet *set)
99 {
100 for (int i = 0; i < (last[f] + 31) / 32; ++i)
101 bits[f][i] |= set->bits[f][i];
102 }
103
104 void
105 RegisterSet::print() const
106 {
107 INFO("GPR:");
108 for (int i = 0; i < (last[FILE_GPR] + 31) / 32; ++i)
109 INFO(" %08x", bits[FILE_GPR][i]);
110 INFO("\n");
111 }
112
113 bool
114 RegisterSet::assign(Value **def, int nr)
115 {
116 DataFile f = def[0]->reg.file;
117 int n = nr;
118 if (n == 3)
119 n = 4;
120 int s = (n * def[0]->reg.size) >> unit[f];
121 uint32_t m = (1 << s) - 1;
122
123 int id = last[f] + 1;
124 int i;
125
126 for (i = 0; (i * 32) < last[f]; ++i) {
127 if (bits[f][i] == 0xffffffff)
128 continue;
129
130 for (id = 0; id < 32; id += s)
131 if (!(bits[f][i] & (m << id)))
132 break;
133 if (id < 32)
134 break;
135 }
136 id += i * 32;
137 if (id > last[f])
138 return false;
139
140 bits[f][id / 32] |= m << (id % 32);
141
142 if (id + (s - 1) > fill[f])
143 fill[f] = id + (s - 1);
144
145 for (i = 0; i < nr; ++i, ++id)
146 if (!def[i]->livei.isEmpty()) // XXX: really increased id if empty ?
147 def[i]->reg.data.id = id;
148 return true;
149 }
150
151 void
152 RegisterSet::occupy(const Value *val)
153 {
154 int id = val->reg.data.id;
155 if (id < 0)
156 return;
157 unsigned int f = val->reg.file;
158
159 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
160
161 INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %x\n", f, id, m);
162
163 bits[f][id / 32] |= m << (id % 32);
164
165 if (fill[f] < id)
166 fill[f] = id;
167 }
168
169 void
170 RegisterSet::release(const Value *val)
171 {
172 int id = val->reg.data.id;
173 if (id < 0)
174 return;
175 unsigned int f = val->reg.file;
176
177 uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
178
179 INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %x\n", f, id, m);
180
181 bits[f][id / 32] &= ~(m << (id % 32));
182 }
183
184 #define JOIN_MASK_PHI (1 << 0)
185 #define JOIN_MASK_UNION (1 << 1)
186 #define JOIN_MASK_MOV (1 << 2)
187 #define JOIN_MASK_TEX (1 << 3)
188 #define JOIN_MASK_CONSTRAINT (1 << 4)
189
190 class RegAlloc
191 {
192 public:
193 RegAlloc(Program *program) : prog(program), sequence(0) { }
194
195 bool exec();
196 bool execFunc();
197
198 private:
199 bool coalesceValues(unsigned int mask);
200 bool linearScan();
201 bool allocateConstrainedValues();
202
203 private:
204 class PhiMovesPass : public Pass {
205 private:
206 virtual bool visit(BasicBlock *);
207 inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
208 };
209
210 class BuildIntervalsPass : public Pass {
211 private:
212 virtual bool visit(BasicBlock *);
213 void collectLiveValues(BasicBlock *);
214 void addLiveRange(Value *, const BasicBlock *, int end);
215 };
216
217 class InsertConstraintsPass : public Pass {
218 public:
219 bool exec(Function *func);
220 private:
221 virtual bool visit(BasicBlock *);
222
223 bool insertConstraintMoves();
224
225 void addHazard(Instruction *i, const ValueRef *src);
226 void textureMask(TexInstruction *);
227 void addConstraint(Instruction *, int s, int n);
228 bool detectConflict(Instruction *, int s);
229
230 DLList constrList;
231 };
232
233 bool buildLiveSets(BasicBlock *);
234 void collectLValues(DLList&, bool assignedOnly);
235
236 void insertOrderedTail(DLList&, Value *);
237 inline Instruction *insnBySerial(int);
238
239 private:
240 Program *prog;
241 Function *func;
242
243 // instructions in control flow / chronological order
244 ArrayList insns;
245
246 int sequence; // for manual passes through CFG
247 };
248
249 Instruction *
250 RegAlloc::insnBySerial(int serial)
251 {
252 return reinterpret_cast<Instruction *>(insns.get(serial));
253 }
254
255 void
256 RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
257 const BasicBlock *bb,
258 int end)
259 {
260 Instruction *insn = val->getUniqueInsn();
261
262 if (!insn)
263 return;
264 assert(bb->getFirst()->serial <= bb->getExit()->serial);
265 assert(bb->getExit()->serial + 1 >= end);
266
267 int begin = insn->serial;
268 if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
269 begin = bb->getEntry()->serial;
270
271 INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
272 val->id, begin, insn->serial, end);
273
274 if (begin != end) // empty ranges are only added as hazards for fixed regs
275 val->livei.extend(begin, end);
276 }
277
278 bool
279 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
280 {
281 if (b->cfg.incidentCount() <= 1)
282 return false;
283
284 int n = 0;
285 for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
286 if (ei.getType() == Graph::Edge::TREE ||
287 ei.getType() == Graph::Edge::FORWARD)
288 ++n;
289 return (n == 2);
290 }
291
292 // For each operand of each PHI in b, generate a new value by inserting a MOV
293 // at the end of the block it is coming from and replace the operand with its
294 // result. This eliminates liveness conflicts and enables us to let values be
295 // copied to the right register if such a conflict exists nonetheless.
296 //
297 // These MOVs are also crucial in making sure the live intervals of phi srces
298 // are extended until the end of the loop, since they are not included in the
299 // live-in sets.
300 bool
301 RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
302 {
303 Instruction *phi, *mov;
304 BasicBlock *pb, *pn;
305
306 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
307 pb = pn = BasicBlock::get(ei.getNode());
308 assert(pb);
309
310 if (needNewElseBlock(bb, pb)) {
311 pn = new BasicBlock(func);
312
313 // deletes an edge, iterator is invalid after this:
314 pb->cfg.detach(&bb->cfg);
315 pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
316 pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); // XXX: check order !
317
318 assert(pb->getExit()->op != OP_CALL);
319 if (pb->getExit()->asFlow()->target.bb == bb)
320 pb->getExit()->asFlow()->target.bb = pn;
321 break;
322 }
323 }
324
325 // insert MOVs (phi->src[j] should stem from j-th in-BB)
326 int j = 0;
327 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
328 pb = BasicBlock::get(ei.getNode());
329 if (!pb->isTerminated())
330 pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
331
332 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
333 mov = new_Instruction(func, OP_MOV, TYPE_U32);
334
335 mov->setSrc(0, phi->getSrc(j));
336 mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
337 phi->setSrc(j, mov->getDef(0));
338
339 pb->insertBefore(pb->getExit(), mov);
340 }
341 ++j;
342 }
343
344 return true;
345 }
346
347 // Build the set of live-in variables of bb.
348 bool
349 RegAlloc::buildLiveSets(BasicBlock *bb)
350 {
351 BasicBlock *bn;
352 Instruction *i;
353 unsigned int s, d;
354
355 INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
356
357 bb->liveSet.allocate(func->allLValues.getSize(), false);
358
359 int n = 0;
360 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
361 bn = BasicBlock::get(ei.getNode());
362 if (bn == bb)
363 continue;
364 if (bn->cfg.visit(sequence))
365 if (!buildLiveSets(bn))
366 return false;
367 if (n++ == 0)
368 bb->liveSet = bn->liveSet;
369 else
370 bb->liveSet |= bn->liveSet;
371 }
372 if (!n && !bb->liveSet.marker)
373 bb->liveSet.fill(0);
374 bb->liveSet.marker = true;
375
376 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
377 INFO("BB:%i live set of out blocks:\n", bb->getId());
378 bb->liveSet.print();
379 }
380
381 // if (!bb->getEntry())
382 // return true;
383
384 for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
385 for (d = 0; i->defExists(d); ++d)
386 bb->liveSet.clr(i->getDef(d)->id);
387 for (s = 0; i->srcExists(s); ++s)
388 if (i->getSrc(s)->asLValue())
389 bb->liveSet.set(i->getSrc(s)->id);
390 }
391 for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
392 bb->liveSet.clr(i->getDef(0)->id);
393
394 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
395 INFO("BB:%i live set after propagation:\n", bb->getId());
396 bb->liveSet.print();
397 }
398
399 return true;
400 }
401
402 void
403 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
404 {
405 BasicBlock *bbA = NULL, *bbB = NULL;
406
407 assert(bb->cfg.incidentCount() || bb->liveSet.popCount() == 0);
408
409 if (bb->cfg.outgoingCount()) {
410 // trickery to save a loop of OR'ing liveSets
411 // aliasing works fine with BitSet::setOr
412 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
413 if (ei.getType() == Graph::Edge::DUMMY)
414 continue;
415 if (bbA) {
416 bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
417 bbA = bb;
418 } else {
419 bbA = bbB;
420 }
421 bbB = BasicBlock::get(ei.getNode());
422 }
423 bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
424 } else
425 if (bb->cfg.incidentCount()) {
426 bb->liveSet.fill(0);
427 }
428 }
429
430 bool
431 RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
432 {
433 collectLiveValues(bb);
434
435 INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
436
437 // go through out blocks and delete phi sources that do not originate from
438 // the current block from the live set
439 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
440 BasicBlock *out = BasicBlock::get(ei.getNode());
441
442 for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
443 bb->liveSet.clr(i->getDef(0)->id);
444
445 for (int s = 0; s < NV50_IR_MAX_SRCS && i->src[s].exists(); ++s) {
446 assert(i->src[s].getInsn());
447 if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
448 bb->liveSet.set(i->getSrc(s)->id);
449 else
450 bb->liveSet.clr(i->getSrc(s)->id);
451 }
452 }
453 }
454
455 // remaining live-outs are live until end
456 if (bb->getExit()) {
457 for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
458 if (bb->liveSet.test(j))
459 addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
460 }
461
462 for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
463 for (int d = 0; i->defExists(d); ++d) {
464 bb->liveSet.clr(i->getDef(d)->id);
465 if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
466 i->getDef(d)->livei.extend(i->serial, i->serial);
467 }
468
469 for (int s = 0; i->srcExists(s); ++s) {
470 if (!i->getSrc(s)->asLValue())
471 continue;
472 if (!bb->liveSet.test(i->getSrc(s)->id)) {
473 bb->liveSet.set(i->getSrc(s)->id);
474 addLiveRange(i->getSrc(s), bb, i->serial);
475 }
476 }
477 }
478
479 return true;
480 }
481
482 bool
483 RegAlloc::coalesceValues(unsigned int mask)
484 {
485 int c, n;
486
487 for (n = 0; n < insns.getSize(); ++n) {
488 Instruction *i;
489 Instruction *insn = insnBySerial(n);
490
491 switch (insn->op) {
492 case OP_PHI:
493 if (!(mask & JOIN_MASK_PHI))
494 break;
495 for (c = 0; insn->srcExists(c); ++c)
496 if (!insn->getDef(0)->coalesce(insn->getSrc(c), false)) {
497 ERROR("failed to coalesce phi operands\n");
498 return false;
499 }
500 break;
501 case OP_UNION:
502 if (!(mask & JOIN_MASK_UNION))
503 break;
504 for (c = 0; insn->srcExists(c); ++c)
505 insn->getDef(0)->coalesce(insn->getSrc(c), true);
506 break;
507 case OP_CONSTRAINT:
508 if (!(mask & JOIN_MASK_CONSTRAINT))
509 break;
510 for (c = 0; c < 4 && insn->srcExists(c); ++c)
511 insn->getDef(c)->coalesce(insn->getSrc(c), true);
512 break;
513 case OP_MOV:
514 if (!(mask & JOIN_MASK_MOV))
515 break;
516 i = insn->getDef(0)->uses ? insn->getDef(0)->uses->getInsn() : NULL;
517 // if this is a contraint-move there will only be a single use
518 if (i && i->op == OP_CONSTRAINT)
519 break;
520 i = insn->getSrc(0)->getUniqueInsn();
521 if (i && !i->constrainedDefs())
522 insn->getDef(0)->coalesce(insn->getSrc(0), false);
523 break;
524 case OP_TEX:
525 case OP_TXB:
526 case OP_TXL:
527 case OP_TXF:
528 case OP_TXQ:
529 case OP_TXD:
530 case OP_TXG:
531 case OP_TEXCSAA:
532 if (!(mask & JOIN_MASK_TEX))
533 break;
534 for (c = 0; c < 4 && insn->srcExists(c); ++c)
535 insn->getDef(c)->coalesce(insn->getSrc(c), true);
536 break;
537 default:
538 break;
539 }
540 }
541 return true;
542 }
543
544 void
545 RegAlloc::insertOrderedTail(DLList &list, Value *val)
546 {
547 // we insert the live intervals in order, so this should be short
548 DLList::Iterator iter = list.revIterator();
549 const int begin = val->livei.begin();
550 for (; !iter.end(); iter.next()) {
551 if (reinterpret_cast<Value *>(iter.get())->livei.begin() <= begin)
552 break;
553 }
554 iter.insert(val);
555 }
556
557 static void
558 checkList(DLList &list)
559 {
560 Value *prev = NULL;
561 Value *next = NULL;
562
563 for (DLList::Iterator iter = list.iterator(); !iter.end(); iter.next()) {
564 next = Value::get(iter);
565 assert(next);
566 if (prev) {
567 assert(prev->livei.begin() <= next->livei.begin());
568 }
569 assert(next->join == next);
570 prev = next;
571 }
572 }
573
574 void
575 RegAlloc::collectLValues(DLList &list, bool assignedOnly)
576 {
577 for (int n = 0; n < insns.getSize(); ++n) {
578 Instruction *i = insnBySerial(n);
579
580 for (int d = 0; i->defExists(d); ++d)
581 if (!i->getDef(d)->livei.isEmpty())
582 if (!assignedOnly || i->getDef(d)->reg.data.id >= 0)
583 insertOrderedTail(list, i->getDef(d));
584 }
585 checkList(list);
586 }
587
588 bool
589 RegAlloc::allocateConstrainedValues()
590 {
591 Value *defs[4];
592 RegisterSet regSet[4];
593 DLList regVals;
594
595 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: allocating constrained values\n");
596
597 collectLValues(regVals, true);
598
599 for (int c = 0; c < 4; ++c)
600 regSet[c].init(prog->getTarget());
601
602 for (int n = 0; n < insns.getSize(); ++n) {
603 Instruction *i = insnBySerial(n);
604
605 const int vecSize = i->defCount(0xf);
606 if (vecSize < 2)
607 continue;
608 assert(vecSize <= 4);
609
610 for (int c = 0; c < vecSize; ++c)
611 defs[c] = i->def[c].rep();
612
613 if (defs[0]->reg.data.id >= 0) {
614 for (int c = 1; c < vecSize; ++c) {
615 assert(defs[c]->reg.data.id >= 0);
616 }
617 continue;
618 }
619
620 for (int c = 0; c < vecSize; ++c) {
621 uint32_t mask;
622 regSet[c].reset();
623
624 for (DLList::Iterator it = regVals.iterator(); !it.end(); it.next()) {
625 Value *rVal = Value::get(it);
626 if (rVal->reg.data.id >= 0 && rVal->livei.overlaps(defs[c]->livei))
627 regSet[c].occupy(rVal);
628 }
629 mask = 0x11111111;
630 if (vecSize == 2) // granularity is 2 instead of 4
631 mask |= 0x11111111 << 2;
632 regSet[c].periodicMask(defs[0]->reg.file, 0, ~(mask << c));
633
634 if (!defs[c]->livei.isEmpty())
635 insertOrderedTail(regVals, defs[c]);
636 }
637 for (int c = 1; c < vecSize; ++c)
638 regSet[0].intersect(defs[0]->reg.file, &regSet[c]);
639
640 if (!regSet[0].assign(&defs[0], vecSize)) // TODO: spilling
641 return false;
642 }
643 for (int c = 0; c < 4; c += 2)
644 if (regSet[c].getMaxAssigned(FILE_GPR) > prog->maxGPR)
645 prog->maxGPR = regSet[c].getMaxAssigned(FILE_GPR);
646 return true;
647 }
648
649 bool
650 RegAlloc::linearScan()
651 {
652 Value *cur, *val;
653 DLList unhandled, active, inactive;
654 RegisterSet f(prog->getTarget()), free(prog->getTarget());
655
656 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: linear scan\n");
657
658 collectLValues(unhandled, false);
659
660 for (DLList::Iterator cI = unhandled.iterator(); !cI.end();) {
661 cur = Value::get(cI);
662 cI.erase();
663
664 for (DLList::Iterator aI = active.iterator(); !aI.end();) {
665 val = Value::get(aI);
666 if (val->livei.end() <= cur->livei.begin()) {
667 free.release(val);
668 aI.erase();
669 } else
670 if (!val->livei.contains(cur->livei.begin())) {
671 free.release(val);
672 aI.moveToList(inactive);
673 } else {
674 aI.next();
675 }
676 }
677
678 for (DLList::Iterator iI = inactive.iterator(); !iI.end();) {
679 val = Value::get(iI);
680 if (val->livei.end() <= cur->livei.begin()) {
681 iI.erase();
682 } else
683 if (val->livei.contains(cur->livei.begin())) {
684 free.occupy(val);
685 iI.moveToList(active);
686 } else {
687 iI.next();
688 }
689 }
690 f = free;
691
692 for (DLList::Iterator iI = inactive.iterator(); !iI.end(); iI.next()) {
693 val = Value::get(iI);
694 if (val->livei.overlaps(cur->livei))
695 f.occupy(val);
696 }
697
698 for (DLList::Iterator uI = unhandled.iterator(); !uI.end(); uI.next()) {
699 val = Value::get(uI);
700 if (val->reg.data.id >= 0 && val->livei.overlaps(cur->livei))
701 f.occupy(val);
702 }
703
704 if (cur->reg.data.id < 0) {
705 bool spill = !f.assign(&cur, 1);
706 if (spill) {
707 ERROR("out of registers of file %u\n", cur->reg.file);
708 abort();
709 }
710 }
711 free.occupy(cur);
712 active.insert(cur);
713 }
714
715 if (f.getMaxAssigned(FILE_GPR) > prog->maxGPR)
716 prog->maxGPR = f.getMaxAssigned(FILE_GPR);
717 if (free.getMaxAssigned(FILE_GPR) > prog->maxGPR)
718 prog->maxGPR = free.getMaxAssigned(FILE_GPR);
719 return true;
720 }
721
722 bool
723 RegAlloc::exec()
724 {
725 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
726 !fi.end(); fi.next()) {
727 func = reinterpret_cast<Function *>(fi.get());
728 if (!execFunc())
729 return false;
730 }
731 return true;
732 }
733
734 bool
735 RegAlloc::execFunc()
736 {
737 InsertConstraintsPass insertConstr;
738 PhiMovesPass insertMoves;
739 BuildIntervalsPass buildIntervals;
740
741 unsigned int i;
742 bool ret;
743
744 ret = insertConstr.exec(func);
745 if (!ret)
746 goto out;
747
748 ret = insertMoves.run(func);
749 if (!ret)
750 goto out;
751
752 for (sequence = func->cfg.nextSequence(), i = 0;
753 ret && i <= func->loopNestingBound;
754 sequence = func->cfg.nextSequence(), ++i)
755 ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
756 if (!ret)
757 goto out;
758
759 func->orderInstructions(this->insns);
760
761 ret = buildIntervals.run(func);
762 if (!ret)
763 goto out;
764
765 ret = coalesceValues(JOIN_MASK_PHI);
766 if (!ret)
767 goto out;
768 switch (prog->getTarget()->getChipset() & 0xf0) {
769 case 0x50:
770 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_TEX);
771 break;
772 case 0xc0:
773 ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_CONSTRAINT);
774 break;
775 default:
776 break;
777 }
778 if (!ret)
779 goto out;
780 ret = coalesceValues(JOIN_MASK_MOV);
781 if (!ret)
782 goto out;
783
784 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
785 func->print();
786 func->printLiveIntervals();
787 }
788
789 ret = allocateConstrainedValues() && linearScan();
790 if (!ret)
791 goto out;
792
793 out:
794 // TODO: should probably call destructor on LValues later instead
795 for (ArrayList::Iterator it = func->allLValues.iterator();
796 !it.end(); it.next())
797 reinterpret_cast<LValue *>(it.get())->livei.clear();
798
799 return ret;
800 }
801
802 bool Program::registerAllocation()
803 {
804 RegAlloc ra(this);
805 return ra.exec();
806 }
807
808 bool
809 RegAlloc::InsertConstraintsPass::exec(Function *ir)
810 {
811 constrList.clear();
812
813 bool ret = run(ir, true, true);
814 if (ret)
815 ret = insertConstraintMoves();
816 return ret;
817 }
818
819 // TODO: make part of texture insn
820 void
821 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
822 {
823 Value *def[4];
824 int c, k, d;
825 uint8_t mask = 0;
826
827 for (d = 0, k = 0, c = 0; c < 4; ++c) {
828 if (!(tex->tex.mask & (1 << c)))
829 continue;
830 if (tex->getDef(k)->refCount()) {
831 mask |= 1 << c;
832 def[d++] = tex->getDef(k);
833 }
834 ++k;
835 }
836 tex->tex.mask = mask;
837
838 #if 0 // reorder or set the unused ones NULL ?
839 for (c = 0; c < 4; ++c)
840 if (!(tex->tex.mask & (1 << c)))
841 def[d++] = tex->getDef(c);
842 #endif
843 for (c = 0; c < d; ++c)
844 tex->setDef(c, def[c]);
845 #if 1
846 for (; c < 4; ++c)
847 tex->setDef(c, NULL);
848 #endif
849 }
850
851 bool
852 RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
853 {
854 // current register allocation can't handle it if a value participates in
855 // multiple constraints
856 for (ValueRef::Iterator it = cst->src[s].iterator(); !it.end(); it.next()) {
857 Instruction *insn = it.get()->getInsn();
858 if (insn != cst)
859 return true;
860 }
861
862 // can start at s + 1 because detectConflict is called on all sources
863 for (int c = s + 1; cst->srcExists(c); ++c)
864 if (cst->getSrc(c) == cst->getSrc(s))
865 return true;
866
867 Instruction *defi = cst->getSrc(s)->getInsn();
868
869 return (!defi || defi->constrainedDefs());
870 }
871
872 void
873 RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
874 {
875 Instruction *cst;
876 int d;
877
878 // first, look for an existing identical constraint op
879 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
880 cst = reinterpret_cast<Instruction *>(it.get());
881 if (!i->bb->dominatedBy(cst->bb))
882 break;
883 for (d = 0; d < n; ++d)
884 if (cst->getSrc(d) != i->getSrc(d + s))
885 break;
886 if (d >= n) {
887 for (d = 0; d < n; ++d, ++s)
888 i->setSrc(s, cst->getDef(d));
889 return;
890 }
891 }
892 cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
893
894 for (d = 0; d < n; ++s, ++d) {
895 cst->setDef(d, new_LValue(func, FILE_GPR));
896 cst->setSrc(d, i->getSrc(s));
897 i->setSrc(s, cst->getDef(d));
898 }
899 i->bb->insertBefore(i, cst);
900
901 constrList.insert(cst);
902 }
903
904 // Add a dummy use of the pointer source of >= 8 byte loads after the load
905 // to prevent it from being assigned a register which overlapping the load's
906 // destination, which would produce random corruptions.
907 void
908 RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
909 {
910 Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
911 hzd->setSrc(0, src->get());
912 i->bb->insertAfter(i, hzd);
913
914 }
915
916 // Insert constraint markers for instructions whose multiple sources must be
917 // located in consecutive registers.
918 bool
919 RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
920 {
921 TexInstruction *tex;
922 Instruction *next;
923 int s, n, size;
924
925 for (Instruction *i = bb->getEntry(); i; i = next) {
926 next = i->next;
927
928 if ((tex = i->asTex())) {
929 textureMask(tex);
930
931 // FIXME: this is target specific
932 if (tex->op == OP_TXQ) {
933 s = tex->srcCount(0xff);
934 n = 0;
935 } else {
936 s = tex->tex.target.getArgCount();
937 if (!tex->tex.target.isArray() &&
938 (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
939 ++s;
940 if (tex->op == OP_TXD && tex->tex.useOffsets)
941 ++s;
942 n = tex->srcCount(0xff) - s;
943 assert(n <= 4);
944 }
945
946 if (s > 1)
947 addConstraint(i, 0, s);
948 if (n > 1)
949 addConstraint(i, s, n);
950 } else
951 if (i->op == OP_EXPORT || i->op == OP_STORE) {
952 for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
953 assert(i->srcExists(s));
954 size -= i->getSrc(s)->reg.size;
955 }
956 if ((s - 1) > 1)
957 addConstraint(i, 1, s - 1);
958 } else
959 if (i->op == OP_LOAD) {
960 if (i->src[0].isIndirect(0) && typeSizeof(i->dType) >= 8)
961 addHazard(i, i->src[0].getIndirect(0));
962 }
963 }
964 return true;
965 }
966
967 // Insert extra moves so that, if multiple register constraints on a value are
968 // in conflict, these conflicts can be resolved.
969 bool
970 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
971 {
972 for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
973 Instruction *cst = reinterpret_cast<Instruction *>(it.get());
974
975 for (int s = 0; cst->srcExists(s); ++s) {
976 if (!detectConflict(cst, s))
977 continue;
978 Instruction *mov = new_Instruction(func, OP_MOV,
979 typeOfSize(cst->src[s].getSize()));
980 mov->setSrc(0, cst->getSrc(s));
981 mov->setDef(0, new_LValue(func, FILE_GPR));
982 cst->setSrc(s, mov->getDef(0));
983
984 cst->bb->insertBefore(cst, mov);
985 }
986 }
987 return true;
988 }
989
990 } // namespace nv50_ir