2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nv50_ir_target.h"
26 #include "nv50/nv50_debug.h"
30 #define MAX_REGISTER_FILE_SIZE 256
36 RegisterSet(const Target
*);
38 void init(const Target
*);
39 void reset(); // reset allocation status, but not max assigned regs
41 void periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
);
42 void intersect(DataFile f
, const RegisterSet
*);
44 bool assign(Value
**, int nr
);
45 void release(const Value
*);
46 bool occupy(const Value
*);
48 int getMaxAssigned(DataFile f
) const { return fill
[f
]; }
53 uint32_t bits
[FILE_ADDRESS
+ 1][(MAX_REGISTER_FILE_SIZE
+ 31) / 32];
55 int unit
[FILE_ADDRESS
+ 1]; // log2 of allocation granularity
57 int last
[FILE_ADDRESS
+ 1];
58 int fill
[FILE_ADDRESS
+ 1];
64 memset(bits
, 0, sizeof(bits
));
67 RegisterSet::RegisterSet()
73 RegisterSet::init(const Target
*targ
)
75 for (unsigned int rf
= 0; rf
<= FILE_ADDRESS
; ++rf
) {
76 DataFile f
= static_cast<DataFile
>(rf
);
77 last
[rf
] = targ
->getFileSize(f
) - 1;
78 unit
[rf
] = targ
->getFileUnit(f
);
80 assert(last
[rf
] < MAX_REGISTER_FILE_SIZE
);
84 RegisterSet::RegisterSet(const Target
*targ
)
91 RegisterSet::periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
)
93 for (int i
= 0; i
< (last
[f
] + 31) / 32; ++i
)
94 bits
[f
][i
] = (bits
[f
][i
] | lock
) & ~unlock
;
98 RegisterSet::intersect(DataFile f
, const RegisterSet
*set
)
100 for (int i
= 0; i
< (last
[f
] + 31) / 32; ++i
)
101 bits
[f
][i
] |= set
->bits
[f
][i
];
105 RegisterSet::print() const
108 for (int i
= 0; i
< (last
[FILE_GPR
] + 31) / 32; ++i
)
109 INFO(" %08x", bits
[FILE_GPR
][i
]);
114 RegisterSet::assign(Value
**def
, int nr
)
116 DataFile f
= def
[0]->reg
.file
;
120 int s
= (n
* def
[0]->reg
.size
) >> unit
[f
];
121 uint32_t m
= (1 << s
) - 1;
123 int id
= last
[f
] + 1;
126 for (i
= 0; (i
* 32) < last
[f
]; ++i
) {
127 if (bits
[f
][i
] == 0xffffffff)
130 for (id
= 0; id
< 32; id
+= s
)
131 if (!(bits
[f
][i
] & (m
<< id
)))
140 bits
[f
][id
/ 32] |= m
<< (id
% 32);
142 if (id
+ (s
- 1) > fill
[f
])
143 fill
[f
] = id
+ (s
- 1);
145 for (i
= 0; i
< nr
; ++i
, ++id
)
146 if (!def
[i
]->livei
.isEmpty()) // XXX: really increased id if empty ?
147 def
[i
]->reg
.data
.id
= id
;
152 RegisterSet::occupy(const Value
*val
)
154 int id
= val
->reg
.data
.id
;
155 unsigned int f
= val
->reg
.file
;
157 uint32_t m
= (1 << (val
->reg
.size
>> unit
[f
])) - 1;
159 if (id
< 0 || bits
[f
][id
/ 32] & m
<< (id
% 32))
162 INFO_DBG(0, REG_ALLOC
, "reg occupy: %u[%i] %x\n", f
, id
, m
);
164 bits
[f
][id
/ 32] |= m
<< (id
% 32);
173 RegisterSet::release(const Value
*val
)
175 int id
= val
->reg
.data
.id
;
178 unsigned int f
= val
->reg
.file
;
180 uint32_t m
= (1 << (val
->reg
.size
>> unit
[f
])) - 1;
182 INFO_DBG(0, REG_ALLOC
, "reg release: %u[%i] %x\n", f
, id
, m
);
184 bits
[f
][id
/ 32] &= ~(m
<< (id
% 32));
187 #define JOIN_MASK_PHI (1 << 0)
188 #define JOIN_MASK_UNION (1 << 1)
189 #define JOIN_MASK_MOV (1 << 2)
190 #define JOIN_MASK_TEX (1 << 3)
191 #define JOIN_MASK_CONSTRAINT (1 << 4)
196 RegAlloc(Program
*program
) : prog(program
), sequence(0) { }
202 bool coalesceValues(unsigned int mask
);
204 bool allocateConstrainedValues();
207 class PhiMovesPass
: public Pass
{
209 virtual bool visit(BasicBlock
*);
210 inline bool needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
);
213 class ArgumentMovesPass
: public Pass
{
215 virtual bool visit(BasicBlock
*);
218 class BuildIntervalsPass
: public Pass
{
220 virtual bool visit(BasicBlock
*);
221 void collectLiveValues(BasicBlock
*);
222 void addLiveRange(Value
*, const BasicBlock
*, int end
);
225 class InsertConstraintsPass
: public Pass
{
227 bool exec(Function
*func
);
229 virtual bool visit(BasicBlock
*);
231 bool insertConstraintMoves();
233 void addHazard(Instruction
*i
, const ValueRef
*src
);
234 void textureMask(TexInstruction
*);
235 void addConstraint(Instruction
*, int s
, int n
);
236 bool detectConflict(Instruction
*, int s
);
241 bool buildLiveSets(BasicBlock
*);
242 void collectLValues(DLList
&, bool assignedOnly
);
244 void insertOrderedTail(DLList
&, Value
*);
245 inline Instruction
*insnBySerial(int);
251 // instructions in control flow / chronological order
254 int sequence
; // for manual passes through CFG
258 RegAlloc::insnBySerial(int serial
)
260 return reinterpret_cast<Instruction
*>(insns
.get(serial
));
264 RegAlloc::BuildIntervalsPass::addLiveRange(Value
*val
,
265 const BasicBlock
*bb
,
268 Instruction
*insn
= val
->getUniqueInsn();
271 insn
= bb
->getFirst();
273 assert(bb
->getFirst()->serial
<= bb
->getExit()->serial
);
274 assert(bb
->getExit()->serial
+ 1 >= end
);
276 int begin
= insn
->serial
;
277 if (begin
< bb
->getEntry()->serial
|| begin
> bb
->getExit()->serial
)
278 begin
= bb
->getEntry()->serial
;
280 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "%%%i <- live range [%i(%i), %i)\n",
281 val
->id
, begin
, insn
->serial
, end
);
283 if (begin
!= end
) // empty ranges are only added as hazards for fixed regs
284 val
->livei
.extend(begin
, end
);
288 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
)
290 if (b
->cfg
.incidentCount() <= 1)
294 for (Graph::EdgeIterator ei
= p
->cfg
.outgoing(); !ei
.end(); ei
.next())
295 if (ei
.getType() == Graph::Edge::TREE
||
296 ei
.getType() == Graph::Edge::FORWARD
)
301 // For each operand of each PHI in b, generate a new value by inserting a MOV
302 // at the end of the block it is coming from and replace the operand with its
303 // result. This eliminates liveness conflicts and enables us to let values be
304 // copied to the right register if such a conflict exists nonetheless.
306 // These MOVs are also crucial in making sure the live intervals of phi srces
307 // are extended until the end of the loop, since they are not included in the
310 RegAlloc::PhiMovesPass::visit(BasicBlock
*bb
)
312 Instruction
*phi
, *mov
;
315 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
316 pb
= pn
= BasicBlock::get(ei
.getNode());
319 if (needNewElseBlock(bb
, pb
)) {
320 pn
= new BasicBlock(func
);
322 // deletes an edge, iterator is invalid after this:
323 pb
->cfg
.detach(&bb
->cfg
);
324 pb
->cfg
.attach(&pn
->cfg
, Graph::Edge::TREE
);
325 pn
->cfg
.attach(&bb
->cfg
, Graph::Edge::FORWARD
); // XXX: check order !
327 assert(pb
->getExit()->op
!= OP_CALL
);
328 if (pb
->getExit()->asFlow()->target
.bb
== bb
)
329 pb
->getExit()->asFlow()->target
.bb
= pn
;
334 // insert MOVs (phi->src(j) should stem from j-th in-BB)
336 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
337 pb
= BasicBlock::get(ei
.getNode());
338 if (!pb
->isTerminated())
339 pb
->insertTail(new_FlowInstruction(func
, OP_BRA
, bb
));
341 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= phi
->next
) {
342 mov
= new_Instruction(func
, OP_MOV
, TYPE_U32
);
344 mov
->setSrc(0, phi
->getSrc(j
));
345 mov
->setDef(0, new_LValue(func
, phi
->getDef(0)->asLValue()));
346 phi
->setSrc(j
, mov
->getDef(0));
348 pb
->insertBefore(pb
->getExit(), mov
);
357 RegAlloc::ArgumentMovesPass::visit(BasicBlock
*bb
)
359 // Bind function call inputs/outputs to the same physical register
360 // the callee uses, inserting moves as appropriate for the case a
362 for (Instruction
*i
= bb
->getEntry(); i
; i
= i
->next
) {
363 FlowInstruction
*cal
= i
->asFlow();
364 if (!cal
|| cal
->op
!= OP_CALL
|| cal
->builtin
)
366 RegisterSet
clobberSet(prog
->getTarget());
368 // Bind input values.
369 for (int s
= 0; cal
->srcExists(s
); ++s
) {
370 LValue
*tmp
= new_LValue(func
, cal
->getSrc(s
)->asLValue());
371 tmp
->reg
.data
.id
= cal
->target
.fn
->ins
[s
].rep()->reg
.data
.id
;
374 new_Instruction(func
, OP_MOV
, typeOfSize(tmp
->reg
.size
));
376 mov
->setSrc(0, cal
->getSrc(s
));
379 bb
->insertBefore(cal
, mov
);
382 // Bind output values.
383 for (int d
= 0; cal
->defExists(d
); ++d
) {
384 LValue
*tmp
= new_LValue(func
, cal
->getDef(d
)->asLValue());
385 tmp
->reg
.data
.id
= cal
->target
.fn
->outs
[d
].rep()->reg
.data
.id
;
388 new_Instruction(func
, OP_MOV
, typeOfSize(tmp
->reg
.size
));
390 mov
->setDef(0, cal
->getDef(d
));
393 bb
->insertAfter(cal
, mov
);
394 clobberSet
.occupy(tmp
);
397 // Bind clobbered values.
398 for (std::deque
<Value
*>::iterator it
= cal
->target
.fn
->clobbers
.begin();
399 it
!= cal
->target
.fn
->clobbers
.end();
401 if (clobberSet
.occupy(*it
)) {
402 Value
*tmp
= new_LValue(func
, (*it
)->asLValue());
403 tmp
->reg
.data
.id
= (*it
)->reg
.data
.id
;
404 cal
->setDef(cal
->defCount(), tmp
);
409 // Update the clobber set of the function.
410 if (BasicBlock::get(func
->cfgExit
) == bb
) {
411 func
->buildDefSets();
412 for (unsigned int i
= 0; i
< bb
->defSet
.getSize(); ++i
)
413 if (bb
->defSet
.test(i
))
414 func
->clobbers
.push_back(func
->getLValue(i
));
420 // Build the set of live-in variables of bb.
422 RegAlloc::buildLiveSets(BasicBlock
*bb
)
424 Function
*f
= bb
->getFunction();
429 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "buildLiveSets(BB:%i)\n", bb
->getId());
431 bb
->liveSet
.allocate(func
->allLValues
.getSize(), false);
434 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
435 bn
= BasicBlock::get(ei
.getNode());
438 if (bn
->cfg
.visit(sequence
))
439 if (!buildLiveSets(bn
))
442 bb
->liveSet
= bn
->liveSet
;
444 bb
->liveSet
|= bn
->liveSet
;
446 if (!n
&& !bb
->liveSet
.marker
)
448 bb
->liveSet
.marker
= true;
450 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
451 INFO("BB:%i live set of out blocks:\n", bb
->getId());
455 // if (!bb->getEntry())
458 if (bb
== BasicBlock::get(f
->cfgExit
)) {
459 for (std::deque
<ValueRef
>::iterator it
= f
->outs
.begin();
460 it
!= f
->outs
.end(); ++it
) {
461 assert(it
->get()->asLValue());
462 bb
->liveSet
.set(it
->get()->id
);
466 for (i
= bb
->getExit(); i
&& i
!= bb
->getEntry()->prev
; i
= i
->prev
) {
467 for (d
= 0; i
->defExists(d
); ++d
)
468 bb
->liveSet
.clr(i
->getDef(d
)->id
);
469 for (s
= 0; i
->srcExists(s
); ++s
)
470 if (i
->getSrc(s
)->asLValue())
471 bb
->liveSet
.set(i
->getSrc(s
)->id
);
473 for (i
= bb
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
)
474 bb
->liveSet
.clr(i
->getDef(0)->id
);
476 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
477 INFO("BB:%i live set after propagation:\n", bb
->getId());
485 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock
*bb
)
487 BasicBlock
*bbA
= NULL
, *bbB
= NULL
;
489 if (bb
->cfg
.outgoingCount()) {
490 // trickery to save a loop of OR'ing liveSets
491 // aliasing works fine with BitSet::setOr
492 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
493 if (ei
.getType() == Graph::Edge::DUMMY
)
496 bb
->liveSet
.setOr(&bbA
->liveSet
, &bbB
->liveSet
);
501 bbB
= BasicBlock::get(ei
.getNode());
503 bb
->liveSet
.setOr(&bbB
->liveSet
, bbA
? &bbA
->liveSet
: NULL
);
505 if (bb
->cfg
.incidentCount()) {
511 RegAlloc::BuildIntervalsPass::visit(BasicBlock
*bb
)
513 collectLiveValues(bb
);
515 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "BuildIntervals(BB:%i)\n", bb
->getId());
517 // go through out blocks and delete phi sources that do not originate from
518 // the current block from the live set
519 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
520 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
522 for (Instruction
*i
= out
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
) {
523 bb
->liveSet
.clr(i
->getDef(0)->id
);
525 for (int s
= 0; i
->srcExists(s
); ++s
) {
526 assert(i
->src(s
).getInsn());
527 if (i
->getSrc(s
)->getUniqueInsn()->bb
== bb
) // XXX: reachableBy ?
528 bb
->liveSet
.set(i
->getSrc(s
)->id
);
530 bb
->liveSet
.clr(i
->getSrc(s
)->id
);
535 // remaining live-outs are live until end
537 for (unsigned int j
= 0; j
< bb
->liveSet
.getSize(); ++j
)
538 if (bb
->liveSet
.test(j
))
539 addLiveRange(func
->getLValue(j
), bb
, bb
->getExit()->serial
+ 1);
542 for (Instruction
*i
= bb
->getExit(); i
&& i
->op
!= OP_PHI
; i
= i
->prev
) {
543 for (int d
= 0; i
->defExists(d
); ++d
) {
544 bb
->liveSet
.clr(i
->getDef(d
)->id
);
545 if (i
->getDef(d
)->reg
.data
.id
>= 0) // add hazard for fixed regs
546 i
->getDef(d
)->livei
.extend(i
->serial
, i
->serial
);
549 for (int s
= 0; i
->srcExists(s
); ++s
) {
550 if (!i
->getSrc(s
)->asLValue())
552 if (!bb
->liveSet
.test(i
->getSrc(s
)->id
)) {
553 bb
->liveSet
.set(i
->getSrc(s
)->id
);
554 addLiveRange(i
->getSrc(s
), bb
, i
->serial
);
559 if (bb
== BasicBlock::get(func
->cfg
.getRoot())) {
560 for (std::deque
<ValueDef
>::iterator it
= func
->ins
.begin();
561 it
!= func
->ins
.end(); ++it
) {
562 if (it
->get()->reg
.data
.id
>= 0) // add hazard for fixed regs
563 it
->get()->livei
.extend(0, 1);
571 RegAlloc::coalesceValues(unsigned int mask
)
575 for (n
= 0; n
< insns
.getSize(); ++n
) {
577 Instruction
*insn
= insnBySerial(n
);
581 if (!(mask
& JOIN_MASK_PHI
))
583 for (c
= 0; insn
->srcExists(c
); ++c
)
584 if (!insn
->getDef(0)->coalesce(insn
->getSrc(c
), false)) {
585 ERROR("failed to coalesce phi operands\n");
590 if (!(mask
& JOIN_MASK_UNION
))
592 for (c
= 0; insn
->srcExists(c
); ++c
)
593 insn
->getDef(0)->coalesce(insn
->getSrc(c
), true);
596 if (!(mask
& JOIN_MASK_CONSTRAINT
))
598 for (c
= 0; c
< 4 && insn
->srcExists(c
); ++c
)
599 insn
->getDef(c
)->coalesce(insn
->getSrc(c
), true);
602 if (!(mask
& JOIN_MASK_MOV
))
605 if (!insn
->getDef(0)->uses
.empty())
606 i
= insn
->getDef(0)->uses
.front()->getInsn();
607 // if this is a contraint-move there will only be a single use
608 if (i
&& i
->op
== OP_CONSTRAINT
)
610 i
= insn
->getSrc(0)->getUniqueInsn();
611 if (i
&& !i
->constrainedDefs())
612 insn
->getDef(0)->coalesce(insn
->getSrc(0), false);
622 if (!(mask
& JOIN_MASK_TEX
))
624 for (c
= 0; c
< 4 && insn
->srcExists(c
); ++c
)
625 insn
->getDef(c
)->coalesce(insn
->getSrc(c
), true);
635 RegAlloc::insertOrderedTail(DLList
&list
, Value
*val
)
637 // we insert the live intervals in order, so this should be short
638 DLList::Iterator iter
= list
.revIterator();
639 const int begin
= val
->livei
.begin();
640 for (; !iter
.end(); iter
.next()) {
641 if (reinterpret_cast<Value
*>(iter
.get())->livei
.begin() <= begin
)
648 checkList(DLList
&list
)
653 for (DLList::Iterator iter
= list
.iterator(); !iter
.end(); iter
.next()) {
654 next
= Value::get(iter
);
657 assert(prev
->livei
.begin() <= next
->livei
.begin());
659 assert(next
->join
== next
);
665 RegAlloc::collectLValues(DLList
&list
, bool assignedOnly
)
667 for (int n
= 0; n
< insns
.getSize(); ++n
) {
668 Instruction
*i
= insnBySerial(n
);
670 for (int d
= 0; i
->defExists(d
); ++d
)
671 if (!i
->getDef(d
)->livei
.isEmpty())
672 if (!assignedOnly
|| i
->getDef(d
)->reg
.data
.id
>= 0)
673 insertOrderedTail(list
, i
->getDef(d
));
679 RegAlloc::allocateConstrainedValues()
682 RegisterSet regSet
[4];
685 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RA: allocating constrained values\n");
687 collectLValues(regVals
, true);
689 for (int c
= 0; c
< 4; ++c
)
690 regSet
[c
].init(prog
->getTarget());
692 for (int n
= 0; n
< insns
.getSize(); ++n
) {
693 Instruction
*i
= insnBySerial(n
);
695 const int vecSize
= i
->defCount(0xf);
698 assert(vecSize
<= 4);
700 for (int c
= 0; c
< vecSize
; ++c
)
701 defs
[c
] = i
->def(c
).rep();
703 if (defs
[0]->reg
.data
.id
>= 0) {
704 for (int c
= 1; c
< vecSize
; ++c
) {
705 assert(defs
[c
]->reg
.data
.id
>= 0);
710 for (int c
= 0; c
< vecSize
; ++c
) {
714 for (DLList::Iterator it
= regVals
.iterator(); !it
.end(); it
.next()) {
715 Value
*rVal
= Value::get(it
);
716 if (rVal
->reg
.data
.id
>= 0 && rVal
->livei
.overlaps(defs
[c
]->livei
))
717 regSet
[c
].occupy(rVal
);
720 if (vecSize
== 2) // granularity is 2 instead of 4
721 mask
|= 0x11111111 << 2;
722 regSet
[c
].periodicMask(defs
[0]->reg
.file
, 0, ~(mask
<< c
));
724 if (!defs
[c
]->livei
.isEmpty())
725 insertOrderedTail(regVals
, defs
[c
]);
727 for (int c
= 1; c
< vecSize
; ++c
)
728 regSet
[0].intersect(defs
[0]->reg
.file
, ®Set
[c
]);
730 if (!regSet
[0].assign(&defs
[0], vecSize
)) // TODO: spilling
733 for (int c
= 0; c
< 4; c
+= 2)
734 if (regSet
[c
].getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
735 prog
->maxGPR
= regSet
[c
].getMaxAssigned(FILE_GPR
);
740 RegAlloc::linearScan()
743 DLList unhandled
, active
, inactive
;
744 RegisterSet
f(prog
->getTarget()), free(prog
->getTarget());
746 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RA: linear scan\n");
748 collectLValues(unhandled
, false);
750 for (DLList::Iterator cI
= unhandled
.iterator(); !cI
.end();) {
751 cur
= Value::get(cI
);
754 for (DLList::Iterator aI
= active
.iterator(); !aI
.end();) {
755 val
= Value::get(aI
);
756 if (val
->livei
.end() <= cur
->livei
.begin()) {
760 if (!val
->livei
.contains(cur
->livei
.begin())) {
762 aI
.moveToList(inactive
);
768 for (DLList::Iterator iI
= inactive
.iterator(); !iI
.end();) {
769 val
= Value::get(iI
);
770 if (val
->livei
.end() <= cur
->livei
.begin()) {
773 if (val
->livei
.contains(cur
->livei
.begin())) {
775 iI
.moveToList(active
);
782 for (DLList::Iterator iI
= inactive
.iterator(); !iI
.end(); iI
.next()) {
783 val
= Value::get(iI
);
784 if (val
->livei
.overlaps(cur
->livei
))
788 for (DLList::Iterator uI
= unhandled
.iterator(); !uI
.end(); uI
.next()) {
789 val
= Value::get(uI
);
790 if (val
->reg
.data
.id
>= 0 && val
->livei
.overlaps(cur
->livei
))
794 if (cur
->reg
.data
.id
< 0) {
795 bool spill
= !f
.assign(&cur
, 1);
797 ERROR("out of registers of file %u\n", cur
->reg
.file
);
805 if (f
.getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
806 prog
->maxGPR
= f
.getMaxAssigned(FILE_GPR
);
807 if (free
.getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
808 prog
->maxGPR
= free
.getMaxAssigned(FILE_GPR
);
815 for (IteratorRef it
= prog
->calls
.iteratorDFS(false);
816 !it
->end(); it
->next()) {
817 func
= Function::get(reinterpret_cast<Graph::Node
*>(it
->get()));
827 InsertConstraintsPass insertConstr
;
828 PhiMovesPass insertPhiMoves
;
829 ArgumentMovesPass insertArgMoves
;
830 BuildIntervalsPass buildIntervals
;
835 ret
= insertConstr
.exec(func
);
839 ret
= insertPhiMoves
.run(func
);
843 ret
= insertArgMoves
.run(func
, true);
847 for (sequence
= func
->cfg
.nextSequence(), i
= 0;
848 ret
&& i
<= func
->loopNestingBound
;
849 sequence
= func
->cfg
.nextSequence(), ++i
)
850 ret
= buildLiveSets(BasicBlock::get(func
->cfg
.getRoot()));
854 func
->orderInstructions(this->insns
);
856 ret
= buildIntervals
.run(func
);
860 ret
= coalesceValues(JOIN_MASK_PHI
);
863 switch (prog
->getTarget()->getChipset() & 0xf0) {
865 ret
= coalesceValues(JOIN_MASK_UNION
| JOIN_MASK_TEX
);
868 ret
= coalesceValues(JOIN_MASK_UNION
| JOIN_MASK_CONSTRAINT
);
875 ret
= coalesceValues(JOIN_MASK_MOV
);
879 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
881 func
->printLiveIntervals();
884 ret
= allocateConstrainedValues() && linearScan();
889 // TODO: should probably call destructor on LValues later instead
890 for (ArrayList::Iterator it
= func
->allLValues
.iterator();
891 !it
.end(); it
.next())
892 reinterpret_cast<LValue
*>(it
.get())->livei
.clear();
897 bool Program::registerAllocation()
904 RegAlloc::InsertConstraintsPass::exec(Function
*ir
)
908 bool ret
= run(ir
, true, true);
910 ret
= insertConstraintMoves();
914 // TODO: make part of texture insn
916 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction
*tex
)
922 for (d
= 0, k
= 0, c
= 0; c
< 4; ++c
) {
923 if (!(tex
->tex
.mask
& (1 << c
)))
925 if (tex
->getDef(k
)->refCount()) {
927 def
[d
++] = tex
->getDef(k
);
931 tex
->tex
.mask
= mask
;
933 #if 0 // reorder or set the unused ones NULL ?
934 for (c
= 0; c
< 4; ++c
)
935 if (!(tex
->tex
.mask
& (1 << c
)))
936 def
[d
++] = tex
->getDef(c
);
938 for (c
= 0; c
< d
; ++c
)
939 tex
->setDef(c
, def
[c
]);
942 tex
->setDef(c
, NULL
);
947 RegAlloc::InsertConstraintsPass::detectConflict(Instruction
*cst
, int s
)
949 Value
*v
= cst
->getSrc(s
);
951 // current register allocation can't handle it if a value participates in
952 // multiple constraints
953 for (Value::UseIterator it
= v
->uses
.begin(); it
!= v
->uses
.end(); ++it
) {
954 if (cst
!= (*it
)->getInsn())
958 // can start at s + 1 because detectConflict is called on all sources
959 for (int c
= s
+ 1; cst
->srcExists(c
); ++c
)
960 if (v
== cst
->getSrc(c
))
963 Instruction
*defi
= v
->getInsn();
965 return (!defi
|| defi
->constrainedDefs());
969 RegAlloc::InsertConstraintsPass::addConstraint(Instruction
*i
, int s
, int n
)
974 // first, look for an existing identical constraint op
975 for (DLList::Iterator it
= constrList
.iterator(); !it
.end(); it
.next()) {
976 cst
= reinterpret_cast<Instruction
*>(it
.get());
977 if (!i
->bb
->dominatedBy(cst
->bb
))
979 for (d
= 0; d
< n
; ++d
)
980 if (cst
->getSrc(d
) != i
->getSrc(d
+ s
))
983 for (d
= 0; d
< n
; ++d
, ++s
)
984 i
->setSrc(s
, cst
->getDef(d
));
988 cst
= new_Instruction(func
, OP_CONSTRAINT
, i
->dType
);
990 for (d
= 0; d
< n
; ++s
, ++d
) {
991 cst
->setDef(d
, new_LValue(func
, FILE_GPR
));
992 cst
->setSrc(d
, i
->getSrc(s
));
993 i
->setSrc(s
, cst
->getDef(d
));
995 i
->bb
->insertBefore(i
, cst
);
997 constrList
.insert(cst
);
1000 // Add a dummy use of the pointer source of >= 8 byte loads after the load
1001 // to prevent it from being assigned a register which overlapping the load's
1002 // destination, which would produce random corruptions.
1004 RegAlloc::InsertConstraintsPass::addHazard(Instruction
*i
, const ValueRef
*src
)
1006 Instruction
*hzd
= new_Instruction(func
, OP_NOP
, TYPE_NONE
);
1007 hzd
->setSrc(0, src
->get());
1008 i
->bb
->insertAfter(i
, hzd
);
1012 // Insert constraint markers for instructions whose multiple sources must be
1013 // located in consecutive registers.
1015 RegAlloc::InsertConstraintsPass::visit(BasicBlock
*bb
)
1017 TexInstruction
*tex
;
1021 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
1024 if ((tex
= i
->asTex())) {
1027 // FIXME: this is target specific
1028 if (tex
->op
== OP_TXQ
) {
1029 s
= tex
->srcCount(0xff);
1032 s
= tex
->tex
.target
.getArgCount();
1033 if (!tex
->tex
.target
.isArray() &&
1034 (tex
->tex
.rIndirectSrc
>= 0 || tex
->tex
.sIndirectSrc
>= 0))
1036 if (tex
->op
== OP_TXD
&& tex
->tex
.useOffsets
)
1038 n
= tex
->srcCount(0xff) - s
;
1043 addConstraint(i
, 0, s
);
1045 addConstraint(i
, s
, n
);
1047 if (i
->op
== OP_EXPORT
|| i
->op
== OP_STORE
) {
1048 for (size
= typeSizeof(i
->dType
), s
= 1; size
> 0; ++s
) {
1049 assert(i
->srcExists(s
));
1050 size
-= i
->getSrc(s
)->reg
.size
;
1053 addConstraint(i
, 1, s
- 1);
1055 if (i
->op
== OP_LOAD
) {
1056 if (i
->src(0).isIndirect(0) && typeSizeof(i
->dType
) >= 8)
1057 addHazard(i
, i
->src(0).getIndirect(0));
1063 // Insert extra moves so that, if multiple register constraints on a value are
1064 // in conflict, these conflicts can be resolved.
1066 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
1068 for (DLList::Iterator it
= constrList
.iterator(); !it
.end(); it
.next()) {
1069 Instruction
*cst
= reinterpret_cast<Instruction
*>(it
.get());
1071 for (int s
= 0; cst
->srcExists(s
); ++s
) {
1072 if (!detectConflict(cst
, s
))
1074 Instruction
*mov
= new_Instruction(func
, OP_MOV
,
1075 typeOfSize(cst
->src(s
).getSize()));
1076 mov
->setSrc(0, cst
->getSrc(s
));
1077 mov
->setDef(0, new_LValue(func
, FILE_GPR
));
1078 cst
->setSrc(s
, mov
->getDef(0));
1080 cst
->bb
->insertBefore(cst
, mov
);
1086 } // namespace nv50_ir