2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nv50_ir_target.h"
31 #define MAX_REGISTER_FILE_SIZE 256
36 RegisterSet(const Target
*);
38 void init(const Target
*);
39 void reset(DataFile
, bool resetMax
= false);
41 void periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
);
42 void intersect(DataFile f
, const RegisterSet
*);
44 bool assign(int32_t& reg
, DataFile f
, unsigned int size
);
45 void release(DataFile f
, int32_t reg
, unsigned int size
);
46 bool occupy(DataFile f
, int32_t reg
, unsigned int size
);
47 bool occupy(const Value
*);
48 void occupyMask(DataFile f
, int32_t reg
, uint8_t mask
);
50 inline int getMaxAssigned(DataFile f
) const { return fill
[f
]; }
52 inline unsigned int getFileSize(DataFile f
, uint8_t regSize
) const
54 if (restrictedGPR16Range
&& f
== FILE_GPR
&& regSize
== 2)
55 return (last
[f
] + 1) / 2;
59 inline unsigned int units(DataFile f
, unsigned int size
) const
61 return size
>> unit
[f
];
63 // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
64 inline unsigned int idToBytes(Value
*v
) const
66 return v
->reg
.data
.id
* MIN2(v
->reg
.size
, 4);
68 inline unsigned int idToUnits(Value
*v
) const
70 return units(v
->reg
.file
, idToBytes(v
));
72 inline int bytesToId(Value
*v
, unsigned int bytes
) const
75 return units(v
->reg
.file
, bytes
);
78 inline int unitsToId(DataFile f
, int u
, uint8_t size
) const
82 return (size
< 4) ? u
: ((u
<< unit
[f
]) / 4);
88 BitSet bits
[LAST_REGISTER_FILE
+ 1];
90 int unit
[LAST_REGISTER_FILE
+ 1]; // log2 of allocation granularity
92 int last
[LAST_REGISTER_FILE
+ 1];
93 int fill
[LAST_REGISTER_FILE
+ 1];
95 const bool restrictedGPR16Range
;
99 RegisterSet::reset(DataFile f
, bool resetMax
)
107 RegisterSet::init(const Target
*targ
)
109 for (unsigned int rf
= 0; rf
<= FILE_ADDRESS
; ++rf
) {
110 DataFile f
= static_cast<DataFile
>(rf
);
111 last
[rf
] = targ
->getFileSize(f
) - 1;
112 unit
[rf
] = targ
->getFileUnit(f
);
114 assert(last
[rf
] < MAX_REGISTER_FILE_SIZE
);
115 bits
[rf
].allocate(last
[rf
] + 1, true);
119 RegisterSet::RegisterSet(const Target
*targ
)
120 : restrictedGPR16Range(targ
->getChipset() < 0xc0)
123 for (unsigned int i
= 0; i
<= LAST_REGISTER_FILE
; ++i
)
124 reset(static_cast<DataFile
>(i
));
128 RegisterSet::periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
)
130 bits
[f
].periodicMask32(lock
, unlock
);
134 RegisterSet::intersect(DataFile f
, const RegisterSet
*set
)
136 bits
[f
] |= set
->bits
[f
];
140 RegisterSet::print() const
143 bits
[FILE_GPR
].print();
148 RegisterSet::assign(int32_t& reg
, DataFile f
, unsigned int size
)
150 reg
= bits
[f
].findFreeRange(size
);
153 fill
[f
] = MAX2(fill
[f
], (int32_t)(reg
+ size
- 1));
158 RegisterSet::occupy(const Value
*v
)
160 return occupy(v
->reg
.file
, v
->reg
.data
.id
, v
->reg
.size
>> unit
[v
->reg
.file
]);
164 RegisterSet::occupyMask(DataFile f
, int32_t reg
, uint8_t mask
)
166 bits
[f
].setMask(reg
& ~31, static_cast<uint32_t>(mask
) << (reg
% 32));
170 RegisterSet::occupy(DataFile f
, int32_t reg
, unsigned int size
)
172 if (bits
[f
].testRange(reg
, size
))
175 bits
[f
].setRange(reg
, size
);
177 INFO_DBG(0, REG_ALLOC
, "reg occupy: %u[%i] %u\n", f
, reg
, size
);
179 fill
[f
] = MAX2(fill
[f
], (int32_t)(reg
+ size
- 1));
185 RegisterSet::release(DataFile f
, int32_t reg
, unsigned int size
)
187 bits
[f
].clrRange(reg
, size
);
189 INFO_DBG(0, REG_ALLOC
, "reg release: %u[%i] %u\n", f
, reg
, size
);
195 RegAlloc(Program
*program
) : prog(program
), sequence(0) { }
201 class PhiMovesPass
: public Pass
{
203 virtual bool visit(BasicBlock
*);
204 inline bool needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
);
207 class ArgumentMovesPass
: public Pass
{
209 virtual bool visit(BasicBlock
*);
212 class BuildIntervalsPass
: public Pass
{
214 virtual bool visit(BasicBlock
*);
215 void collectLiveValues(BasicBlock
*);
216 void addLiveRange(Value
*, const BasicBlock
*, int end
);
219 class InsertConstraintsPass
: public Pass
{
221 bool exec(Function
*func
);
223 virtual bool visit(BasicBlock
*);
225 bool insertConstraintMoves();
227 void condenseDefs(Instruction
*);
228 void condenseSrcs(Instruction
*, const int first
, const int last
);
230 void addHazard(Instruction
*i
, const ValueRef
*src
);
231 void textureMask(TexInstruction
*);
232 void addConstraint(Instruction
*, int s
, int n
);
233 bool detectConflict(Instruction
*, int s
);
235 // target specific functions, TODO: put in subclass or Target
236 void texConstraintNV50(TexInstruction
*);
237 void texConstraintNVC0(TexInstruction
*);
238 void texConstraintNVE0(TexInstruction
*);
240 std::list
<Instruction
*> constrList
;
245 bool buildLiveSets(BasicBlock
*);
251 // instructions in control flow / chronological order
254 int sequence
; // for manual passes through CFG
257 typedef std::pair
<Value
*, Value
*> ValuePair
;
259 class SpillCodeInserter
262 SpillCodeInserter(Function
*fn
) : func(fn
), stackSize(0), stackBase(0) { }
264 bool run(const std::list
<ValuePair
>&);
266 Symbol
*assignSlot(const Interval
&, unsigned int size
);
267 inline int32_t getStackSize() const { return stackSize
; }
275 std::list
<Value
*> residents
; // needed to recalculate occup
278 inline uint8_t size() const { return sym
->reg
.size
; }
280 std::list
<SpillSlot
> slots
;
284 LValue
*unspill(Instruction
*usei
, LValue
*, Value
*slot
);
285 void spill(Instruction
*defi
, Value
*slot
, LValue
*);
289 RegAlloc::BuildIntervalsPass::addLiveRange(Value
*val
,
290 const BasicBlock
*bb
,
293 Instruction
*insn
= val
->getUniqueInsn();
296 insn
= bb
->getFirst();
298 assert(bb
->getFirst()->serial
<= bb
->getExit()->serial
);
299 assert(bb
->getExit()->serial
+ 1 >= end
);
301 int begin
= insn
->serial
;
302 if (begin
< bb
->getEntry()->serial
|| begin
> bb
->getExit()->serial
)
303 begin
= bb
->getEntry()->serial
;
305 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "%%%i <- live range [%i(%i), %i)\n",
306 val
->id
, begin
, insn
->serial
, end
);
308 if (begin
!= end
) // empty ranges are only added as hazards for fixed regs
309 val
->livei
.extend(begin
, end
);
313 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
)
315 if (b
->cfg
.incidentCount() <= 1)
319 for (Graph::EdgeIterator ei
= p
->cfg
.outgoing(); !ei
.end(); ei
.next())
320 if (ei
.getType() == Graph::Edge::TREE
||
321 ei
.getType() == Graph::Edge::FORWARD
)
326 // For each operand of each PHI in b, generate a new value by inserting a MOV
327 // at the end of the block it is coming from and replace the operand with its
328 // result. This eliminates liveness conflicts and enables us to let values be
329 // copied to the right register if such a conflict exists nonetheless.
331 // These MOVs are also crucial in making sure the live intervals of phi srces
332 // are extended until the end of the loop, since they are not included in the
335 RegAlloc::PhiMovesPass::visit(BasicBlock
*bb
)
337 Instruction
*phi
, *mov
;
340 std::stack
<BasicBlock
*> stack
;
342 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
343 pb
= BasicBlock::get(ei
.getNode());
345 if (needNewElseBlock(bb
, pb
))
348 while (!stack
.empty()) {
350 pn
= new BasicBlock(func
);
353 pb
->cfg
.detach(&bb
->cfg
);
354 pb
->cfg
.attach(&pn
->cfg
, Graph::Edge::TREE
);
355 pn
->cfg
.attach(&bb
->cfg
, Graph::Edge::FORWARD
);
357 assert(pb
->getExit()->op
!= OP_CALL
);
358 if (pb
->getExit()->asFlow()->target
.bb
== bb
)
359 pb
->getExit()->asFlow()->target
.bb
= pn
;
362 // insert MOVs (phi->src(j) should stem from j-th in-BB)
364 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
365 pb
= BasicBlock::get(ei
.getNode());
366 if (!pb
->isTerminated())
367 pb
->insertTail(new_FlowInstruction(func
, OP_BRA
, bb
));
369 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= phi
->next
) {
370 mov
= new_Instruction(func
, OP_MOV
, TYPE_U32
);
372 mov
->setSrc(0, phi
->getSrc(j
));
373 mov
->setDef(0, new_LValue(func
, phi
->getDef(0)->asLValue()));
374 phi
->setSrc(j
, mov
->getDef(0));
376 pb
->insertBefore(pb
->getExit(), mov
);
385 RegAlloc::ArgumentMovesPass::visit(BasicBlock
*bb
)
387 // Bind function call inputs/outputs to the same physical register
388 // the callee uses, inserting moves as appropriate for the case a
390 for (Instruction
*i
= bb
->getEntry(); i
; i
= i
->next
) {
391 FlowInstruction
*cal
= i
->asFlow();
392 if (!cal
|| cal
->op
!= OP_CALL
|| cal
->builtin
)
394 RegisterSet
clobberSet(prog
->getTarget());
396 // Bind input values.
397 for (int s
= 0; cal
->srcExists(s
); ++s
) {
398 LValue
*tmp
= new_LValue(func
, cal
->getSrc(s
)->asLValue());
399 tmp
->reg
.data
.id
= cal
->target
.fn
->ins
[s
].rep()->reg
.data
.id
;
402 new_Instruction(func
, OP_MOV
, typeOfSize(tmp
->reg
.size
));
404 mov
->setSrc(0, cal
->getSrc(s
));
407 bb
->insertBefore(cal
, mov
);
410 // Bind output values.
411 for (int d
= 0; cal
->defExists(d
); ++d
) {
412 LValue
*tmp
= new_LValue(func
, cal
->getDef(d
)->asLValue());
413 tmp
->reg
.data
.id
= cal
->target
.fn
->outs
[d
].rep()->reg
.data
.id
;
416 new_Instruction(func
, OP_MOV
, typeOfSize(tmp
->reg
.size
));
418 mov
->setDef(0, cal
->getDef(d
));
421 bb
->insertAfter(cal
, mov
);
422 clobberSet
.occupy(tmp
);
425 // Bind clobbered values.
426 for (std::deque
<Value
*>::iterator it
= cal
->target
.fn
->clobbers
.begin();
427 it
!= cal
->target
.fn
->clobbers
.end();
429 if (clobberSet
.occupy(*it
)) {
430 Value
*tmp
= new_LValue(func
, (*it
)->asLValue());
431 tmp
->reg
.data
.id
= (*it
)->reg
.data
.id
;
432 cal
->setDef(cal
->defCount(), tmp
);
437 // Update the clobber set of the function.
438 if (BasicBlock::get(func
->cfgExit
) == bb
) {
439 func
->buildDefSets();
440 for (unsigned int i
= 0; i
< bb
->defSet
.getSize(); ++i
)
441 if (bb
->defSet
.test(i
))
442 func
->clobbers
.push_back(func
->getLValue(i
));
448 // Build the set of live-in variables of bb.
450 RegAlloc::buildLiveSets(BasicBlock
*bb
)
452 Function
*f
= bb
->getFunction();
457 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "buildLiveSets(BB:%i)\n", bb
->getId());
459 bb
->liveSet
.allocate(func
->allLValues
.getSize(), false);
462 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
463 bn
= BasicBlock::get(ei
.getNode());
466 if (bn
->cfg
.visit(sequence
))
467 if (!buildLiveSets(bn
))
469 if (n
++ || bb
->liveSet
.marker
)
470 bb
->liveSet
|= bn
->liveSet
;
472 bb
->liveSet
= bn
->liveSet
;
474 if (!n
&& !bb
->liveSet
.marker
)
476 bb
->liveSet
.marker
= true;
478 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
479 INFO("BB:%i live set of out blocks:\n", bb
->getId());
483 // if (!bb->getEntry())
486 if (bb
== BasicBlock::get(f
->cfgExit
)) {
487 for (std::deque
<ValueRef
>::iterator it
= f
->outs
.begin();
488 it
!= f
->outs
.end(); ++it
) {
489 assert(it
->get()->asLValue());
490 bb
->liveSet
.set(it
->get()->id
);
494 for (i
= bb
->getExit(); i
&& i
!= bb
->getEntry()->prev
; i
= i
->prev
) {
495 for (d
= 0; i
->defExists(d
); ++d
)
496 bb
->liveSet
.clr(i
->getDef(d
)->id
);
497 for (s
= 0; i
->srcExists(s
); ++s
)
498 if (i
->getSrc(s
)->asLValue())
499 bb
->liveSet
.set(i
->getSrc(s
)->id
);
501 for (i
= bb
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
)
502 bb
->liveSet
.clr(i
->getDef(0)->id
);
504 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
505 INFO("BB:%i live set after propagation:\n", bb
->getId());
513 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock
*bb
)
515 BasicBlock
*bbA
= NULL
, *bbB
= NULL
;
517 if (bb
->cfg
.outgoingCount()) {
518 // trickery to save a loop of OR'ing liveSets
519 // aliasing works fine with BitSet::setOr
520 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
521 if (ei
.getType() == Graph::Edge::DUMMY
)
524 bb
->liveSet
.setOr(&bbA
->liveSet
, &bbB
->liveSet
);
529 bbB
= BasicBlock::get(ei
.getNode());
531 bb
->liveSet
.setOr(&bbB
->liveSet
, bbA
? &bbA
->liveSet
: NULL
);
533 if (bb
->cfg
.incidentCount()) {
539 RegAlloc::BuildIntervalsPass::visit(BasicBlock
*bb
)
541 collectLiveValues(bb
);
543 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "BuildIntervals(BB:%i)\n", bb
->getId());
545 // go through out blocks and delete phi sources that do not originate from
546 // the current block from the live set
547 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
548 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
550 for (Instruction
*i
= out
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
) {
551 bb
->liveSet
.clr(i
->getDef(0)->id
);
553 for (int s
= 0; i
->srcExists(s
); ++s
) {
554 assert(i
->src(s
).getInsn());
555 if (i
->getSrc(s
)->getUniqueInsn()->bb
== bb
) // XXX: reachableBy ?
556 bb
->liveSet
.set(i
->getSrc(s
)->id
);
558 bb
->liveSet
.clr(i
->getSrc(s
)->id
);
563 // remaining live-outs are live until end
565 for (unsigned int j
= 0; j
< bb
->liveSet
.getSize(); ++j
)
566 if (bb
->liveSet
.test(j
))
567 addLiveRange(func
->getLValue(j
), bb
, bb
->getExit()->serial
+ 1);
570 for (Instruction
*i
= bb
->getExit(); i
&& i
->op
!= OP_PHI
; i
= i
->prev
) {
571 for (int d
= 0; i
->defExists(d
); ++d
) {
572 bb
->liveSet
.clr(i
->getDef(d
)->id
);
573 if (i
->getDef(d
)->reg
.data
.id
>= 0) // add hazard for fixed regs
574 i
->getDef(d
)->livei
.extend(i
->serial
, i
->serial
);
577 for (int s
= 0; i
->srcExists(s
); ++s
) {
578 if (!i
->getSrc(s
)->asLValue())
580 if (!bb
->liveSet
.test(i
->getSrc(s
)->id
)) {
581 bb
->liveSet
.set(i
->getSrc(s
)->id
);
582 addLiveRange(i
->getSrc(s
), bb
, i
->serial
);
587 if (bb
== BasicBlock::get(func
->cfg
.getRoot())) {
588 for (std::deque
<ValueDef
>::iterator it
= func
->ins
.begin();
589 it
!= func
->ins
.end(); ++it
) {
590 if (it
->get()->reg
.data
.id
>= 0) // add hazard for fixed regs
591 it
->get()->livei
.extend(0, 1);
599 #define JOIN_MASK_PHI (1 << 0)
600 #define JOIN_MASK_UNION (1 << 1)
601 #define JOIN_MASK_MOV (1 << 2)
602 #define JOIN_MASK_TEX (1 << 3)
607 GCRA(Function
*, SpillCodeInserter
&);
610 bool allocateRegisters(ArrayList
& insns
);
612 void printNodeInfo() const;
615 class RIG_Node
: public Graph::Node
620 void init(const RegisterSet
&, LValue
*);
622 void addInterference(RIG_Node
*);
623 void addRegPreference(RIG_Node
*);
625 inline LValue
*getValue() const
627 return reinterpret_cast<LValue
*>(data
);
629 inline void setValue(LValue
*lval
) { data
= lval
; }
631 inline uint8_t getCompMask() const
633 return ((1 << colors
) - 1) << (reg
& 7);
636 static inline RIG_Node
*get(const Graph::EdgeIterator
& ei
)
638 return static_cast<RIG_Node
*>(ei
.getNode());
643 uint16_t degreeLimit
; // if deg < degLimit, node is trivially colourable
651 // list pointers for simplify() phase
655 // union of the live intervals of all coalesced values (we want to retain
656 // the separate intervals for testing interference of compound values)
659 std::list
<RIG_Node
*> prefRegs
;
663 inline RIG_Node
*getNode(const LValue
*v
) const { return &nodes
[v
->id
]; }
665 void buildRIG(ArrayList
&);
666 bool coalesce(ArrayList
&);
667 bool doCoalesce(ArrayList
&, unsigned int mask
);
668 void calculateSpillWeights();
670 bool selectRegisters();
671 void cleanup(const bool success
);
673 void simplifyEdge(RIG_Node
*, RIG_Node
*);
674 void simplifyNode(RIG_Node
*);
676 bool coalesceValues(Value
*, Value
*, bool force
);
677 void resolveSplitsAndMerges();
678 void makeCompound(Instruction
*, bool isSplit
);
680 inline void checkInterference(const RIG_Node
*, Graph::EdgeIterator
&);
682 inline void insertOrderedTail(std::list
<RIG_Node
*>&, RIG_Node
*);
683 void checkList(std::list
<RIG_Node
*>&);
686 std::stack
<uint32_t> stack
;
688 // list headers for simplify() phase
694 unsigned int nodeCount
;
699 static uint8_t relDegree
[17][17];
703 // need to fixup register id for participants of OP_MERGE/SPLIT
704 std::list
<Instruction
*> merges
;
705 std::list
<Instruction
*> splits
;
707 SpillCodeInserter
& spill
;
708 std::list
<ValuePair
> mustSpill
;
711 uint8_t GCRA::relDegree
[17][17];
713 GCRA::RIG_Node::RIG_Node() : Node(NULL
), next(this), prev(this)
719 GCRA::printNodeInfo() const
721 for (unsigned int i
= 0; i
< nodeCount
; ++i
) {
722 if (!nodes
[i
].colors
)
724 INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X",
726 nodes
[i
].f
,nodes
[i
].reg
,nodes
[i
].colors
,
728 nodes
[i
].degree
, nodes
[i
].degreeLimit
);
730 for (Graph::EdgeIterator ei
= nodes
[i
].outgoing(); !ei
.end(); ei
.next())
731 INFO(" %%%i", RIG_Node::get(ei
)->getValue()->id
);
732 for (Graph::EdgeIterator ei
= nodes
[i
].incident(); !ei
.end(); ei
.next())
733 INFO(" %%%i", RIG_Node::get(ei
)->getValue()->id
);
739 GCRA::RIG_Node::init(const RegisterSet
& regs
, LValue
*lval
)
742 if (lval
->reg
.data
.id
>= 0)
743 lval
->noSpill
= lval
->fixedReg
= 1;
745 colors
= regs
.units(lval
->reg
.file
, lval
->reg
.size
);
748 if (lval
->reg
.data
.id
>= 0)
749 reg
= regs
.idToUnits(lval
);
751 weight
= std::numeric_limits
<float>::infinity();
753 degreeLimit
= regs
.getFileSize(f
, lval
->reg
.size
);
755 livei
.insert(lval
->livei
);
759 GCRA::coalesceValues(Value
*dst
, Value
*src
, bool force
)
761 LValue
*rep
= dst
->join
->asLValue();
762 LValue
*val
= src
->join
->asLValue();
764 if (!force
&& val
->reg
.data
.id
>= 0) {
765 rep
= src
->join
->asLValue();
766 val
= dst
->join
->asLValue();
768 RIG_Node
*nRep
= &nodes
[rep
->id
];
769 RIG_Node
*nVal
= &nodes
[val
->id
];
771 if (src
->reg
.file
!= dst
->reg
.file
) {
774 WARN("forced coalescing of values in different files !\n");
776 if (!force
&& dst
->reg
.size
!= src
->reg
.size
)
779 if ((rep
->reg
.data
.id
>= 0) && (rep
->reg
.data
.id
!= val
->reg
.data
.id
)) {
781 if (val
->reg
.data
.id
>= 0)
782 WARN("forced coalescing of values in different fixed regs !\n");
784 if (val
->reg
.data
.id
>= 0)
786 // make sure that there is no overlap with the fixed register of rep
787 for (ArrayList::Iterator it
= func
->allLValues
.iterator();
788 !it
.end(); it
.next()) {
789 Value
*reg
= reinterpret_cast<Value
*>(it
.get())->asLValue();
791 if (reg
->interfers(rep
) && reg
->livei
.overlaps(nVal
->livei
))
797 if (!force
&& nRep
->livei
.overlaps(nVal
->livei
))
800 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "joining %%%i($%i) <- %%%i\n",
801 rep
->id
, rep
->reg
.data
.id
, val
->id
);
803 // set join pointer of all values joined with val
804 for (Value::DefIterator def
= val
->defs
.begin(); def
!= val
->defs
.end();
806 (*def
)->get()->join
= rep
;
807 assert(rep
->join
== rep
&& val
->join
== rep
);
809 // add val's definitions to rep and extend the live interval of its RIG node
810 rep
->defs
.insert(rep
->defs
.end(), val
->defs
.begin(), val
->defs
.end());
811 nRep
->livei
.unify(nVal
->livei
);
816 GCRA::coalesce(ArrayList
& insns
)
818 bool ret
= doCoalesce(insns
, JOIN_MASK_PHI
);
821 switch (func
->getProgram()->getTarget()->getChipset() & ~0xf) {
826 ret
= doCoalesce(insns
, JOIN_MASK_UNION
| JOIN_MASK_TEX
);
831 ret
= doCoalesce(insns
, JOIN_MASK_UNION
);
838 return doCoalesce(insns
, JOIN_MASK_MOV
);
841 static inline uint8_t makeCompMask(int compSize
, int base
, int size
)
843 uint8_t m
= ((1 << size
) - 1) << base
;
855 assert(compSize
<= 8);
860 static inline void copyCompound(Value
*dst
, Value
*src
)
862 LValue
*ldst
= dst
->asLValue();
863 LValue
*lsrc
= src
->asLValue();
865 ldst
->compound
= lsrc
->compound
;
866 ldst
->compMask
= lsrc
->compMask
;
870 GCRA::makeCompound(Instruction
*insn
, bool split
)
872 LValue
*rep
= (split
? insn
->getSrc(0) : insn
->getDef(0))->asLValue();
874 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
875 INFO("makeCompound(split = %i): ", split
);
879 const unsigned int size
= getNode(rep
)->colors
;
880 unsigned int base
= 0;
883 rep
->compMask
= 0xff;
886 for (int c
= 0; split
? insn
->defExists(c
) : insn
->srcExists(c
); ++c
) {
887 LValue
*val
= (split
? insn
->getDef(c
) : insn
->getSrc(c
))->asLValue();
891 val
->compMask
= 0xff;
892 val
->compMask
&= makeCompMask(size
, base
, getNode(val
)->colors
);
893 assert(val
->compMask
);
895 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "compound: %%%i:%02x <- %%%i:%02x\n",
896 rep
->id
, rep
->compMask
, val
->id
, val
->compMask
);
898 base
+= getNode(val
)->colors
;
900 assert(base
== size
);
904 GCRA::doCoalesce(ArrayList
& insns
, unsigned int mask
)
908 for (n
= 0; n
< insns
.getSize(); ++n
) {
910 Instruction
*insn
= reinterpret_cast<Instruction
*>(insns
.get(n
));
914 if (!(mask
& JOIN_MASK_PHI
))
916 for (c
= 0; insn
->srcExists(c
); ++c
)
917 if (!coalesceValues(insn
->getDef(0), insn
->getSrc(c
), false)) {
919 ERROR("failed to coalesce phi operands\n");
925 if (!(mask
& JOIN_MASK_UNION
))
927 for (c
= 0; insn
->srcExists(c
); ++c
)
928 coalesceValues(insn
->getDef(0), insn
->getSrc(c
), true);
929 if (insn
->op
== OP_MERGE
) {
930 merges
.push_back(insn
);
931 if (insn
->srcExists(1))
932 makeCompound(insn
, false);
936 if (!(mask
& JOIN_MASK_UNION
))
938 splits
.push_back(insn
);
939 for (c
= 0; insn
->defExists(c
); ++c
)
940 coalesceValues(insn
->getSrc(0), insn
->getDef(c
), true);
941 makeCompound(insn
, true);
944 if (!(mask
& JOIN_MASK_MOV
))
947 if (!insn
->getDef(0)->uses
.empty())
948 i
= insn
->getDef(0)->uses
.front()->getInsn();
949 // if this is a contraint-move there will only be a single use
950 if (i
&& i
->op
== OP_MERGE
) // do we really still need this ?
952 i
= insn
->getSrc(0)->getUniqueInsn();
953 if (i
&& !i
->constrainedDefs()) {
954 if (coalesceValues(insn
->getDef(0), insn
->getSrc(0), false))
955 copyCompound(insn
->getSrc(0), insn
->getDef(0));
966 if (!(mask
& JOIN_MASK_TEX
))
968 for (c
= 0; insn
->srcExists(c
) && c
!= insn
->predSrc
; ++c
)
969 coalesceValues(insn
->getDef(c
), insn
->getSrc(c
), true);
979 GCRA::RIG_Node::addInterference(RIG_Node
*node
)
981 this->degree
+= relDegree
[node
->colors
][colors
];
982 node
->degree
+= relDegree
[colors
][node
->colors
];
984 this->attach(node
, Graph::Edge::CROSS
);
988 GCRA::RIG_Node::addRegPreference(RIG_Node
*node
)
990 prefRegs
.push_back(node
);
993 GCRA::GCRA(Function
*fn
, SpillCodeInserter
& spill
) :
995 regs(fn
->getProgram()->getTarget()),
998 prog
= func
->getProgram();
1000 // initialize relative degrees array - i takes away from j
1001 for (int i
= 1; i
<= 16; ++i
)
1002 for (int j
= 1; j
<= 16; ++j
)
1003 relDegree
[i
][j
] = j
* ((i
+ j
- 1) / j
);
1013 GCRA::checkList(std::list
<RIG_Node
*>& lst
)
1015 GCRA::RIG_Node
*prev
= NULL
;
1017 for (std::list
<RIG_Node
*>::iterator it
= lst
.begin();
1020 assert((*it
)->getValue()->join
== (*it
)->getValue());
1022 assert(prev
->livei
.begin() <= (*it
)->livei
.begin());
1028 GCRA::insertOrderedTail(std::list
<RIG_Node
*>& list
, RIG_Node
*node
)
1030 if (node
->livei
.isEmpty())
1032 // only the intervals of joined values don't necessarily arrive in order
1033 std::list
<RIG_Node
*>::iterator prev
, it
;
1034 for (it
= list
.end(); it
!= list
.begin(); it
= prev
) {
1037 if ((*prev
)->livei
.begin() <= node
->livei
.begin())
1040 list
.insert(it
, node
);
1044 GCRA::buildRIG(ArrayList
& insns
)
1046 std::list
<RIG_Node
*> values
, active
;
1048 for (std::deque
<ValueDef
>::iterator it
= func
->ins
.begin();
1049 it
!= func
->ins
.end(); ++it
)
1050 insertOrderedTail(values
, getNode(it
->get()->asLValue()));
1052 for (int i
= 0; i
< insns
.getSize(); ++i
) {
1053 Instruction
*insn
= reinterpret_cast<Instruction
*>(insns
.get(i
));
1054 for (int d
= 0; insn
->defExists(d
); ++d
)
1055 if (insn
->getDef(d
)->rep() == insn
->getDef(d
))
1056 insertOrderedTail(values
, getNode(insn
->getDef(d
)->asLValue()));
1060 while (!values
.empty()) {
1061 RIG_Node
*cur
= values
.front();
1063 for (std::list
<RIG_Node
*>::iterator it
= active
.begin();
1066 RIG_Node
*node
= *it
;
1068 if (node
->livei
.end() <= cur
->livei
.begin()) {
1069 it
= active
.erase(it
);
1072 if (node
->f
== cur
->f
&& node
->livei
.overlaps(cur
->livei
)) {
1073 cur
->addInterference(node
);
1077 active
.push_back(cur
);
1082 GCRA::calculateSpillWeights()
1084 for (unsigned int i
= 0; i
< nodeCount
; ++i
) {
1085 RIG_Node
*const n
= &nodes
[i
];
1086 if (!nodes
[i
].colors
|| nodes
[i
].livei
.isEmpty())
1088 if (nodes
[i
].reg
>= 0) {
1090 regs
.occupy(n
->f
, n
->reg
, n
->colors
);
1093 LValue
*val
= nodes
[i
].getValue();
1095 if (!val
->noSpill
) {
1097 for (Value::DefIterator it
= val
->defs
.begin();
1098 it
!= val
->defs
.end();
1100 rc
+= (*it
)->get()->refCount();
1103 (float)rc
* (float)rc
/ (float)nodes
[i
].livei
.extent();
1106 if (nodes
[i
].degree
< nodes
[i
].degreeLimit
) {
1108 if (val
->reg
.size
> 4)
1110 DLLIST_ADDHEAD(&lo
[l
], &nodes
[i
]);
1112 DLLIST_ADDHEAD(&hi
, &nodes
[i
]);
1115 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1120 GCRA::simplifyEdge(RIG_Node
*a
, RIG_Node
*b
)
1122 bool move
= b
->degree
>= b
->degreeLimit
;
1124 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1125 "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n",
1126 a
->getValue()->id
, a
->degree
, a
->degreeLimit
,
1127 b
->getValue()->id
, b
->degree
, b
->degreeLimit
);
1129 b
->degree
-= relDegree
[a
->colors
][b
->colors
];
1131 move
= move
&& b
->degree
< b
->degreeLimit
;
1132 if (move
&& !DLLIST_EMPTY(b
)) {
1133 int l
= (b
->getValue()->reg
.size
> 4) ? 1 : 0;
1135 DLLIST_ADDTAIL(&lo
[l
], b
);
1140 GCRA::simplifyNode(RIG_Node
*node
)
1142 for (Graph::EdgeIterator ei
= node
->outgoing(); !ei
.end(); ei
.next())
1143 simplifyEdge(node
, RIG_Node::get(ei
));
1145 for (Graph::EdgeIterator ei
= node
->incident(); !ei
.end(); ei
.next())
1146 simplifyEdge(node
, RIG_Node::get(ei
));
1149 stack
.push(node
->getValue()->id
);
1151 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "SIMPLIFY: pushed %%%i%s\n",
1152 node
->getValue()->id
,
1153 (node
->degree
< node
->degreeLimit
) ? "" : "(spill)");
1160 if (!DLLIST_EMPTY(&lo
[0])) {
1162 simplifyNode(lo
[0].next
);
1163 } while (!DLLIST_EMPTY(&lo
[0]));
1165 if (!DLLIST_EMPTY(&lo
[1])) {
1166 simplifyNode(lo
[1].next
);
1168 if (!DLLIST_EMPTY(&hi
)) {
1169 RIG_Node
*best
= hi
.next
;
1170 float bestScore
= best
->weight
/ (float)best
->degree
;
1172 for (RIG_Node
*it
= best
->next
; it
!= &hi
; it
= it
->next
) {
1173 float score
= it
->weight
/ (float)it
->degree
;
1174 if (score
< bestScore
) {
1179 if (isinf(bestScore
)) {
1180 ERROR("no viable spill candidates left\n");
1191 GCRA::checkInterference(const RIG_Node
*node
, Graph::EdgeIterator
& ei
)
1193 const RIG_Node
*intf
= RIG_Node::get(ei
);
1197 const LValue
*vA
= node
->getValue();
1198 const LValue
*vB
= intf
->getValue();
1200 const uint8_t intfMask
= ((1 << intf
->colors
) - 1) << (intf
->reg
& 7);
1202 if (vA
->compound
| vB
->compound
) {
1203 // NOTE: this only works for >aligned< register tuples !
1204 for (Value::DefCIterator D
= vA
->defs
.begin(); D
!= vA
->defs
.end(); ++D
) {
1205 for (Value::DefCIterator d
= vB
->defs
.begin(); d
!= vB
->defs
.end(); ++d
) {
1206 const LValue
*vD
= (*D
)->get()->asLValue();
1207 const LValue
*vd
= (*d
)->get()->asLValue();
1209 if (!vD
->livei
.overlaps(vd
->livei
)) {
1210 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "(%%%i) X (%%%i): no overlap\n",
1215 uint8_t mask
= vD
->compound
? vD
->compMask
: ~0;
1217 assert(vB
->compound
);
1218 mask
&= vd
->compMask
& vB
->compMask
;
1223 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1224 "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n",
1226 vD
->compound
? vD
->compMask
: 0xff,
1228 vd
->compound
? vd
->compMask
: intfMask
,
1229 vB
->compMask
, intf
->reg
& ~7, mask
);
1231 regs
.occupyMask(node
->f
, intf
->reg
& ~7, mask
);
1235 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1236 "(%%%i) X (%%%i): $r%i + %u\n",
1237 vA
->id
, vB
->id
, intf
->reg
, intf
->colors
);
1238 regs
.occupy(node
->f
, intf
->reg
, intf
->colors
);
1243 GCRA::selectRegisters()
1245 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "\nSELECT phase\n");
1247 while (!stack
.empty()) {
1248 RIG_Node
*node
= &nodes
[stack
.top()];
1251 regs
.reset(node
->f
);
1253 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "\nNODE[%%%i, %u colors]\n",
1254 node
->getValue()->id
, node
->colors
);
1256 for (Graph::EdgeIterator ei
= node
->outgoing(); !ei
.end(); ei
.next())
1257 checkInterference(node
, ei
);
1258 for (Graph::EdgeIterator ei
= node
->incident(); !ei
.end(); ei
.next())
1259 checkInterference(node
, ei
);
1261 if (!node
->prefRegs
.empty()) {
1262 for (std::list
<RIG_Node
*>::const_iterator it
= node
->prefRegs
.begin();
1263 it
!= node
->prefRegs
.end();
1265 if ((*it
)->reg
>= 0 &&
1266 regs
.occupy(node
->f
, (*it
)->reg
, node
->colors
)) {
1267 node
->reg
= (*it
)->reg
;
1274 LValue
*lval
= node
->getValue();
1275 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1277 bool ret
= regs
.assign(node
->reg
, node
->f
, node
->colors
);
1279 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "assigned reg %i\n", node
->reg
);
1280 lval
->compMask
= node
->getCompMask();
1282 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "must spill: %%%i (size %u)\n",
1283 lval
->id
, lval
->reg
.size
);
1284 Symbol
*slot
= NULL
;
1285 if (lval
->reg
.file
== FILE_GPR
)
1286 slot
= spill
.assignSlot(node
->livei
, lval
->reg
.size
);
1287 mustSpill
.push_back(ValuePair(lval
, slot
));
1290 if (!mustSpill
.empty())
1292 for (unsigned int i
= 0; i
< nodeCount
; ++i
) {
1293 LValue
*lval
= nodes
[i
].getValue();
1294 if (nodes
[i
].reg
>= 0 && nodes
[i
].colors
> 0)
1296 regs
.unitsToId(nodes
[i
].f
, nodes
[i
].reg
, lval
->reg
.size
);
1302 GCRA::allocateRegisters(ArrayList
& insns
)
1306 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1307 "allocateRegisters to %u instructions\n", insns
.getSize());
1309 nodeCount
= func
->allLValues
.getSize();
1310 nodes
= new RIG_Node
[nodeCount
];
1313 for (unsigned int i
= 0; i
< nodeCount
; ++i
) {
1314 LValue
*lval
= reinterpret_cast<LValue
*>(func
->allLValues
.get(i
));
1316 nodes
[i
].init(regs
, lval
);
1317 RIG
.insert(&nodes
[i
]);
1321 // coalesce first, we use only 1 RIG node for a group of joined values
1322 ret
= coalesce(insns
);
1326 if (func
->getProgram()->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1327 func
->printLiveIntervals();
1330 calculateSpillWeights();
1333 ret
= selectRegisters();
1335 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1336 "selectRegisters failed, inserting spill code ...\n");
1337 regs
.reset(FILE_GPR
, true);
1338 spill
.run(mustSpill
);
1339 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1342 prog
->maxGPR
= regs
.getMaxAssigned(FILE_GPR
);
1351 GCRA::cleanup(const bool success
)
1355 for (ArrayList::Iterator it
= func
->allLValues
.iterator();
1356 !it
.end(); it
.next()) {
1357 LValue
*lval
= reinterpret_cast<LValue
*>(it
.get());
1359 lval
->livei
.clear();
1364 if (lval
->join
== lval
)
1368 lval
->reg
.data
.id
= lval
->join
->reg
.data
.id
;
1370 for (Value::DefIterator d
= lval
->defs
.begin(); d
!= lval
->defs
.end();
1372 lval
->join
->defs
.remove(*d
);
1378 resolveSplitsAndMerges();
1379 splits
.clear(); // avoid duplicate entries on next coalesce pass
1387 SpillCodeInserter::assignSlot(const Interval
&livei
, unsigned int size
)
1390 int32_t offsetBase
= stackSize
;
1392 std::list
<SpillSlot
>::iterator pos
= slots
.end(), it
= slots
.begin();
1394 if (offsetBase
% size
)
1395 offsetBase
+= size
- (offsetBase
% size
);
1399 for (offset
= offsetBase
; offset
< stackSize
; offset
+= size
) {
1400 while (it
!= slots
.end() && it
->offset
< offset
)
1402 if (it
== slots
.end()) // no slots left
1404 std::list
<SpillSlot
>::iterator bgn
= it
;
1406 while (it
!= slots
.end() && it
->offset
< (offset
+ size
)) {
1408 if (it
->occup
.overlaps(livei
))
1412 if (it
== slots
.end() || it
->offset
>= (offset
+ size
)) {
1414 for (; bgn
!= slots
.end() && bgn
->offset
< (offset
+ size
); ++bgn
) {
1415 bgn
->occup
.insert(livei
);
1416 if (bgn
->size() == size
)
1417 slot
.sym
= bgn
->sym
;
1423 stackSize
= offset
+ size
;
1424 slot
.offset
= offset
;
1425 slot
.sym
= new_Symbol(func
->getProgram(), FILE_MEMORY_LOCAL
);
1426 if (!func
->stackPtr
)
1427 offset
+= func
->tlsBase
;
1428 slot
.sym
->setAddress(NULL
, offset
);
1429 slot
.sym
->reg
.size
= size
;
1430 slots
.insert(pos
, slot
)->occup
.insert(livei
);
1436 SpillCodeInserter::spill(Instruction
*defi
, Value
*slot
, LValue
*lval
)
1438 const DataType ty
= typeOfSize(slot
->reg
.size
);
1441 if (slot
->reg
.file
== FILE_MEMORY_LOCAL
) {
1442 st
= new_Instruction(func
, OP_STORE
, ty
);
1443 st
->setSrc(0, slot
);
1444 st
->setSrc(1, lval
);
1447 st
= new_Instruction(func
, OP_CVT
, ty
);
1448 st
->setDef(0, slot
);
1449 st
->setSrc(0, lval
);
1451 defi
->bb
->insertAfter(defi
, st
);
1455 SpillCodeInserter::unspill(Instruction
*usei
, LValue
*lval
, Value
*slot
)
1457 const DataType ty
= typeOfSize(slot
->reg
.size
);
1459 lval
= cloneShallow(func
, lval
);
1462 if (slot
->reg
.file
== FILE_MEMORY_LOCAL
) {
1464 ld
= new_Instruction(func
, OP_LOAD
, ty
);
1466 ld
= new_Instruction(func
, OP_CVT
, ty
);
1468 ld
->setDef(0, lval
);
1469 ld
->setSrc(0, slot
);
1471 usei
->bb
->insertBefore(usei
, ld
);
1476 SpillCodeInserter::run(const std::list
<ValuePair
>& lst
)
1478 for (std::list
<ValuePair
>::const_iterator it
= lst
.begin(); it
!= lst
.end();
1480 LValue
*lval
= it
->first
->asLValue();
1481 Symbol
*mem
= it
->second
? it
->second
->asSym() : NULL
;
1483 for (Value::DefIterator d
= lval
->defs
.begin(); d
!= lval
->defs
.end();
1486 static_cast<Value
*>(mem
) : new_LValue(func
, FILE_GPR
);
1488 Instruction
*last
= NULL
;
1490 LValue
*dval
= (*d
)->get()->asLValue();
1491 Instruction
*defi
= (*d
)->getInsn();
1493 // handle uses first or they'll contain the spill stores
1494 while (!dval
->uses
.empty()) {
1495 ValueRef
*u
= dval
->uses
.front();
1496 Instruction
*usei
= u
->getInsn();
1498 if (usei
->op
== OP_PHI
) {
1499 tmp
= (slot
->reg
.file
== FILE_MEMORY_LOCAL
) ? NULL
: slot
;
1502 if (!last
|| usei
!= last
->next
) { // TODO: sort uses
1503 tmp
= unspill(usei
, dval
, slot
);
1510 if (defi
->op
== OP_PHI
) {
1511 d
= lval
->defs
.erase(d
);
1513 if (slot
->reg
.file
== FILE_MEMORY_LOCAL
)
1514 delete_Instruction(func
->getProgram(), defi
);
1516 defi
->setDef(0, slot
);
1518 spill(defi
, slot
, dval
);
1524 // TODO: We're not trying to reuse old slots in a potential next iteration.
1525 // We have to update the slots' livei intervals to be able to do that.
1526 stackBase
= stackSize
;
1534 for (IteratorRef it
= prog
->calls
.iteratorDFS(false);
1535 !it
->end(); it
->next()) {
1536 func
= Function::get(reinterpret_cast<Graph::Node
*>(it
->get()));
1538 func
->tlsBase
= prog
->tlsSize
;
1541 prog
->tlsSize
+= func
->tlsSize
;
1547 RegAlloc::execFunc()
1549 InsertConstraintsPass insertConstr
;
1550 PhiMovesPass insertPhiMoves
;
1551 ArgumentMovesPass insertArgMoves
;
1552 BuildIntervalsPass buildIntervals
;
1553 SpillCodeInserter
insertSpills(func
);
1555 GCRA
gcra(func
, insertSpills
);
1557 unsigned int i
, retries
;
1560 ret
= insertConstr
.exec(func
);
1564 ret
= insertPhiMoves
.run(func
);
1568 ret
= insertArgMoves
.run(func
);
1572 // TODO: need to fix up spill slot usage ranges to support > 1 retry
1573 for (retries
= 0; retries
< 3; ++retries
) {
1574 if (retries
&& (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
))
1575 INFO("Retry: %i\n", retries
);
1576 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1579 // spilling to registers may add live ranges, need to rebuild everything
1581 for (sequence
= func
->cfg
.nextSequence(), i
= 0;
1582 ret
&& i
<= func
->loopNestingBound
;
1583 sequence
= func
->cfg
.nextSequence(), ++i
)
1584 ret
= buildLiveSets(BasicBlock::get(func
->cfg
.getRoot()));
1587 func
->orderInstructions(this->insns
);
1589 ret
= buildIntervals
.run(func
);
1592 ret
= gcra
.allocateRegisters(insns
);
1596 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RegAlloc done: %i\n", ret
);
1598 func
->tlsSize
= insertSpills
.getStackSize();
1603 // TODO: check if modifying Instruction::join here breaks anything
1605 GCRA::resolveSplitsAndMerges()
1607 for (std::list
<Instruction
*>::iterator it
= splits
.begin();
1610 Instruction
*split
= *it
;
1611 unsigned int reg
= regs
.idToBytes(split
->getSrc(0));
1612 for (int d
= 0; split
->defExists(d
); ++d
) {
1613 Value
*v
= split
->getDef(d
);
1614 v
->reg
.data
.id
= regs
.bytesToId(v
, reg
);
1621 for (std::list
<Instruction
*>::iterator it
= merges
.begin();
1624 Instruction
*merge
= *it
;
1625 unsigned int reg
= regs
.idToBytes(merge
->getDef(0));
1626 for (int s
= 0; merge
->srcExists(s
); ++s
) {
1627 Value
*v
= merge
->getSrc(s
);
1628 v
->reg
.data
.id
= regs
.bytesToId(v
, reg
);
1636 bool Program::registerAllocation()
1643 RegAlloc::InsertConstraintsPass::exec(Function
*ir
)
1647 bool ret
= run(ir
, true, true);
1649 ret
= insertConstraintMoves();
1653 // TODO: make part of texture insn
1655 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction
*tex
)
1661 for (d
= 0, k
= 0, c
= 0; c
< 4; ++c
) {
1662 if (!(tex
->tex
.mask
& (1 << c
)))
1664 if (tex
->getDef(k
)->refCount()) {
1666 def
[d
++] = tex
->getDef(k
);
1670 tex
->tex
.mask
= mask
;
1672 for (c
= 0; c
< d
; ++c
)
1673 tex
->setDef(c
, def
[c
]);
1675 tex
->setDef(c
, NULL
);
1679 RegAlloc::InsertConstraintsPass::detectConflict(Instruction
*cst
, int s
)
1681 Value
*v
= cst
->getSrc(s
);
1683 // current register allocation can't handle it if a value participates in
1684 // multiple constraints
1685 for (Value::UseIterator it
= v
->uses
.begin(); it
!= v
->uses
.end(); ++it
) {
1686 if (cst
!= (*it
)->getInsn())
1690 // can start at s + 1 because detectConflict is called on all sources
1691 for (int c
= s
+ 1; cst
->srcExists(c
); ++c
)
1692 if (v
== cst
->getSrc(c
))
1695 Instruction
*defi
= v
->getInsn();
1697 return (!defi
|| defi
->constrainedDefs());
1701 RegAlloc::InsertConstraintsPass::addConstraint(Instruction
*i
, int s
, int n
)
1706 // first, look for an existing identical constraint op
1707 for (std::list
<Instruction
*>::iterator it
= constrList
.begin();
1708 it
!= constrList
.end();
1711 if (!i
->bb
->dominatedBy(cst
->bb
))
1713 for (d
= 0; d
< n
; ++d
)
1714 if (cst
->getSrc(d
) != i
->getSrc(d
+ s
))
1717 for (d
= 0; d
< n
; ++d
, ++s
)
1718 i
->setSrc(s
, cst
->getDef(d
));
1722 cst
= new_Instruction(func
, OP_CONSTRAINT
, i
->dType
);
1724 for (d
= 0; d
< n
; ++s
, ++d
) {
1725 cst
->setDef(d
, new_LValue(func
, FILE_GPR
));
1726 cst
->setSrc(d
, i
->getSrc(s
));
1727 i
->setSrc(s
, cst
->getDef(d
));
1729 i
->bb
->insertBefore(i
, cst
);
1731 constrList
.push_back(cst
);
1734 // Add a dummy use of the pointer source of >= 8 byte loads after the load
1735 // to prevent it from being assigned a register which overlapping the load's
1736 // destination, which would produce random corruptions.
1738 RegAlloc::InsertConstraintsPass::addHazard(Instruction
*i
, const ValueRef
*src
)
1740 Instruction
*hzd
= new_Instruction(func
, OP_NOP
, TYPE_NONE
);
1741 hzd
->setSrc(0, src
->get());
1742 i
->bb
->insertAfter(i
, hzd
);
1746 // b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q
1748 RegAlloc::InsertConstraintsPass::condenseDefs(Instruction
*insn
)
1752 for (n
= 0; insn
->defExists(n
) && insn
->def(n
).getFile() == FILE_GPR
; ++n
)
1753 size
+= insn
->getDef(n
)->reg
.size
;
1756 LValue
*lval
= new_LValue(func
, FILE_GPR
);
1757 lval
->reg
.size
= size
;
1759 Instruction
*split
= new_Instruction(func
, OP_SPLIT
, typeOfSize(size
));
1760 split
->setSrc(0, lval
);
1761 for (int d
= 0; d
< n
; ++d
) {
1762 split
->setDef(d
, insn
->getDef(d
));
1763 insn
->setDef(d
, NULL
);
1765 insn
->setDef(0, lval
);
1767 for (int k
= 1, d
= n
; insn
->defExists(d
); ++d
, ++k
) {
1768 insn
->setDef(k
, insn
->getDef(d
));
1769 insn
->setDef(d
, NULL
);
1771 // carry over predicate if any (mainly for OP_UNION uses)
1772 split
->setPredicate(insn
->cc
, insn
->getPredicate());
1774 insn
->bb
->insertAfter(insn
, split
);
1775 constrList
.push_back(split
);
1778 RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction
*insn
,
1779 const int a
, const int b
)
1784 for (int s
= a
; s
<= b
; ++s
)
1785 size
+= insn
->getSrc(s
)->reg
.size
;
1788 LValue
*lval
= new_LValue(func
, FILE_GPR
);
1789 lval
->reg
.size
= size
;
1792 insn
->takeExtraSources(0, save
);
1794 Instruction
*merge
= new_Instruction(func
, OP_MERGE
, typeOfSize(size
));
1795 merge
->setDef(0, lval
);
1796 for (int s
= a
, i
= 0; s
<= b
; ++s
, ++i
) {
1797 merge
->setSrc(i
, insn
->getSrc(s
));
1798 insn
->setSrc(s
, NULL
);
1800 insn
->setSrc(a
, lval
);
1802 for (int k
= a
+ 1, s
= b
+ 1; insn
->srcExists(s
); ++s
, ++k
) {
1803 insn
->setSrc(k
, insn
->getSrc(s
));
1804 insn
->setSrc(s
, NULL
);
1806 insn
->bb
->insertBefore(insn
, merge
);
1808 insn
->putExtraSources(0, save
);
1810 constrList
.push_back(merge
);
1814 RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction
*tex
)
1819 int n
= tex
->srcCount(0xff, true);
1821 condenseSrcs(tex
, 0, 3);
1822 if (n
> 5) // NOTE: first call modified positions already
1823 condenseSrcs(tex
, 4 - (4 - 1), n
- 1 - (4 - 1));
1826 condenseSrcs(tex
, 0, n
- 1);
1831 RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction
*tex
)
1837 if (tex
->op
== OP_TXQ
) {
1838 s
= tex
->srcCount(0xff);
1841 s
= tex
->tex
.target
.getArgCount();
1842 if (!tex
->tex
.target
.isArray() &&
1843 (tex
->tex
.rIndirectSrc
>= 0 || tex
->tex
.sIndirectSrc
>= 0))
1845 if (tex
->op
== OP_TXD
&& tex
->tex
.useOffsets
)
1847 n
= tex
->srcCount(0xff) - s
;
1852 condenseSrcs(tex
, 0, s
- 1);
1853 if (n
> 1) // NOTE: first call modified positions already
1854 condenseSrcs(tex
, 1, n
);
1860 RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction
*tex
)
1862 Value
*pred
= tex
->getPredicate();
1864 tex
->setPredicate(tex
->cc
, NULL
);
1868 assert(tex
->defExists(0) && tex
->srcExists(0));
1869 // make src and def count match
1871 for (c
= 0; tex
->srcExists(c
) || tex
->defExists(c
); ++c
) {
1872 if (!tex
->srcExists(c
))
1873 tex
->setSrc(c
, new_LValue(func
, tex
->getSrc(0)->asLValue()));
1874 if (!tex
->defExists(c
))
1875 tex
->setDef(c
, new_LValue(func
, tex
->getDef(0)->asLValue()));
1878 tex
->setPredicate(tex
->cc
, pred
);
1880 condenseSrcs(tex
, 0, c
- 1);
1883 // Insert constraint markers for instructions whose multiple sources must be
1884 // located in consecutive registers.
1886 RegAlloc::InsertConstraintsPass::visit(BasicBlock
*bb
)
1888 TexInstruction
*tex
;
1892 targ
= bb
->getProgram()->getTarget();
1894 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
1897 if ((tex
= i
->asTex())) {
1898 switch (targ
->getChipset() & ~0xf) {
1903 texConstraintNV50(tex
);
1907 texConstraintNVC0(tex
);
1910 case NVISA_GK110_CHIPSET
:
1911 texConstraintNVE0(tex
);
1917 if (i
->op
== OP_EXPORT
|| i
->op
== OP_STORE
) {
1918 for (size
= typeSizeof(i
->dType
), s
= 1; size
> 0; ++s
) {
1919 assert(i
->srcExists(s
));
1920 size
-= i
->getSrc(s
)->reg
.size
;
1922 condenseSrcs(i
, 1, s
- 1);
1924 if (i
->op
== OP_LOAD
|| i
->op
== OP_VFETCH
) {
1926 if (i
->src(0).isIndirect(0) && typeSizeof(i
->dType
) >= 8)
1927 addHazard(i
, i
->src(0).getIndirect(0));
1929 if (i
->op
== OP_UNION
) {
1930 constrList
.push_back(i
);
1936 // Insert extra moves so that, if multiple register constraints on a value are
1937 // in conflict, these conflicts can be resolved.
1939 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
1941 for (std::list
<Instruction
*>::iterator it
= constrList
.begin();
1942 it
!= constrList
.end();
1944 Instruction
*cst
= *it
;
1947 if (cst
->op
== OP_SPLIT
&& 0) {
1948 // spilling splits is annoying, just make sure they're separate
1949 for (int d
= 0; cst
->defExists(d
); ++d
) {
1950 if (!cst
->getDef(d
)->refCount())
1952 LValue
*lval
= new_LValue(func
, cst
->def(d
).getFile());
1953 const uint8_t size
= cst
->def(d
).getSize();
1954 lval
->reg
.size
= size
;
1956 mov
= new_Instruction(func
, OP_MOV
, typeOfSize(size
));
1957 mov
->setSrc(0, lval
);
1958 mov
->setDef(0, cst
->getDef(d
));
1959 cst
->setDef(d
, mov
->getSrc(0));
1960 cst
->bb
->insertAfter(cst
, mov
);
1962 cst
->getSrc(0)->asLValue()->noSpill
= 1;
1963 mov
->getSrc(0)->asLValue()->noSpill
= 1;
1966 if (cst
->op
== OP_MERGE
|| cst
->op
== OP_UNION
) {
1967 for (int s
= 0; cst
->srcExists(s
); ++s
) {
1968 const uint8_t size
= cst
->src(s
).getSize();
1970 if (!cst
->getSrc(s
)->defs
.size()) {
1971 mov
= new_Instruction(func
, OP_NOP
, typeOfSize(size
));
1972 mov
->setDef(0, cst
->getSrc(s
));
1973 cst
->bb
->insertBefore(cst
, mov
);
1976 assert(cst
->getSrc(s
)->defs
.size() == 1); // still SSA
1978 Instruction
*defi
= cst
->getSrc(s
)->defs
.front()->getInsn();
1979 // catch some cases where don't really need MOVs
1980 if (cst
->getSrc(s
)->refCount() == 1 && !defi
->constrainedDefs())
1983 LValue
*lval
= new_LValue(func
, cst
->src(s
).getFile());
1984 lval
->reg
.size
= size
;
1986 mov
= new_Instruction(func
, OP_MOV
, typeOfSize(size
));
1987 mov
->setDef(0, lval
);
1988 mov
->setSrc(0, cst
->getSrc(s
));
1989 cst
->setSrc(s
, mov
->getDef(0));
1990 cst
->bb
->insertBefore(cst
, mov
);
1992 cst
->getDef(0)->asLValue()->noSpill
= 1; // doesn't help
1994 if (cst
->op
== OP_UNION
)
1995 mov
->setPredicate(defi
->cc
, defi
->getPredicate());
2003 } // namespace nv50_ir