2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
29 #if __cplusplus >= 201103L
30 #include <unordered_map>
32 #include <tr1/unordered_map>
37 #if __cplusplus >= 201103L
39 using std::unordered_map
;
42 using std::tr1::unordered_map
;
45 #define MAX_REGISTER_FILE_SIZE 256
50 RegisterSet(const Target
*);
52 void init(const Target
*);
53 void reset(DataFile
, bool resetMax
= false);
55 void periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
);
56 void intersect(DataFile f
, const RegisterSet
*);
58 bool assign(int32_t& reg
, DataFile f
, unsigned int size
, unsigned int maxReg
);
59 void release(DataFile f
, int32_t reg
, unsigned int size
);
60 void occupy(DataFile f
, int32_t reg
, unsigned int size
);
61 void occupy(const Value
*);
62 void occupyMask(DataFile f
, int32_t reg
, uint8_t mask
);
63 bool isOccupied(DataFile f
, int32_t reg
, unsigned int size
) const;
64 bool testOccupy(const Value
*);
65 bool testOccupy(DataFile f
, int32_t reg
, unsigned int size
);
67 inline int getMaxAssigned(DataFile f
) const { return fill
[f
]; }
69 inline unsigned int getFileSize(DataFile f
) const
74 inline unsigned int units(DataFile f
, unsigned int size
) const
76 return size
>> unit
[f
];
78 // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
79 inline unsigned int idToBytes(const Value
*v
) const
81 return v
->reg
.data
.id
* MIN2(v
->reg
.size
, 4);
83 inline unsigned int idToUnits(const Value
*v
) const
85 return units(v
->reg
.file
, idToBytes(v
));
87 inline int bytesToId(Value
*v
, unsigned int bytes
) const
90 return units(v
->reg
.file
, bytes
);
93 inline int unitsToId(DataFile f
, int u
, uint8_t size
) const
97 return (size
< 4) ? u
: ((u
<< unit
[f
]) / 4);
100 void print(DataFile f
) const;
102 const bool restrictedGPR16Range
;
105 BitSet bits
[LAST_REGISTER_FILE
+ 1];
107 int unit
[LAST_REGISTER_FILE
+ 1]; // log2 of allocation granularity
109 int last
[LAST_REGISTER_FILE
+ 1];
110 int fill
[LAST_REGISTER_FILE
+ 1];
114 RegisterSet::reset(DataFile f
, bool resetMax
)
122 RegisterSet::init(const Target
*targ
)
124 for (unsigned int rf
= 0; rf
<= FILE_ADDRESS
; ++rf
) {
125 DataFile f
= static_cast<DataFile
>(rf
);
126 last
[rf
] = targ
->getFileSize(f
) - 1;
127 unit
[rf
] = targ
->getFileUnit(f
);
129 assert(last
[rf
] < MAX_REGISTER_FILE_SIZE
);
130 bits
[rf
].allocate(last
[rf
] + 1, true);
134 RegisterSet::RegisterSet(const Target
*targ
)
135 : restrictedGPR16Range(targ
->getChipset() < 0xc0)
138 for (unsigned int i
= 0; i
<= LAST_REGISTER_FILE
; ++i
)
139 reset(static_cast<DataFile
>(i
));
143 RegisterSet::periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
)
145 bits
[f
].periodicMask32(lock
, unlock
);
149 RegisterSet::intersect(DataFile f
, const RegisterSet
*set
)
151 bits
[f
] |= set
->bits
[f
];
155 RegisterSet::print(DataFile f
) const
163 RegisterSet::assign(int32_t& reg
, DataFile f
, unsigned int size
, unsigned int maxReg
)
165 reg
= bits
[f
].findFreeRange(size
, maxReg
);
168 fill
[f
] = MAX2(fill
[f
], (int32_t)(reg
+ size
- 1));
173 RegisterSet::isOccupied(DataFile f
, int32_t reg
, unsigned int size
) const
175 return bits
[f
].testRange(reg
, size
);
179 RegisterSet::occupy(const Value
*v
)
181 occupy(v
->reg
.file
, idToUnits(v
), v
->reg
.size
>> unit
[v
->reg
.file
]);
185 RegisterSet::occupyMask(DataFile f
, int32_t reg
, uint8_t mask
)
187 bits
[f
].setMask(reg
& ~31, static_cast<uint32_t>(mask
) << (reg
% 32));
191 RegisterSet::occupy(DataFile f
, int32_t reg
, unsigned int size
)
193 bits
[f
].setRange(reg
, size
);
195 INFO_DBG(0, REG_ALLOC
, "reg occupy: %u[%i] %u\n", f
, reg
, size
);
197 fill
[f
] = MAX2(fill
[f
], (int32_t)(reg
+ size
- 1));
201 RegisterSet::testOccupy(const Value
*v
)
203 return testOccupy(v
->reg
.file
,
204 idToUnits(v
), v
->reg
.size
>> unit
[v
->reg
.file
]);
208 RegisterSet::testOccupy(DataFile f
, int32_t reg
, unsigned int size
)
210 if (isOccupied(f
, reg
, size
))
212 occupy(f
, reg
, size
);
217 RegisterSet::release(DataFile f
, int32_t reg
, unsigned int size
)
219 bits
[f
].clrRange(reg
, size
);
221 INFO_DBG(0, REG_ALLOC
, "reg release: %u[%i] %u\n", f
, reg
, size
);
227 RegAlloc(Program
*program
) : prog(program
), sequence(0) { }
233 class PhiMovesPass
: public Pass
{
235 virtual bool visit(BasicBlock
*);
236 inline bool needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
);
237 inline void splitEdges(BasicBlock
*b
);
240 class ArgumentMovesPass
: public Pass
{
242 virtual bool visit(BasicBlock
*);
245 class BuildIntervalsPass
: public Pass
{
247 virtual bool visit(BasicBlock
*);
248 void collectLiveValues(BasicBlock
*);
249 void addLiveRange(Value
*, const BasicBlock
*, int end
);
252 class InsertConstraintsPass
: public Pass
{
254 bool exec(Function
*func
);
256 virtual bool visit(BasicBlock
*);
258 void insertConstraintMove(Instruction
*, int s
);
259 bool insertConstraintMoves();
261 void condenseDefs(Instruction
*);
262 void condenseDefs(Instruction
*, const int first
, const int last
);
263 void condenseSrcs(Instruction
*, const int first
, const int last
);
265 void addHazard(Instruction
*i
, const ValueRef
*src
);
266 void textureMask(TexInstruction
*);
267 void addConstraint(Instruction
*, int s
, int n
);
268 bool detectConflict(Instruction
*, int s
);
270 // target specific functions, TODO: put in subclass or Target
271 void texConstraintNV50(TexInstruction
*);
272 void texConstraintNVC0(TexInstruction
*);
273 void texConstraintNVE0(TexInstruction
*);
274 void texConstraintGM107(TexInstruction
*);
276 bool isScalarTexGM107(TexInstruction
*);
277 void handleScalarTexGM107(TexInstruction
*);
279 std::list
<Instruction
*> constrList
;
284 bool buildLiveSets(BasicBlock
*);
290 // instructions in control flow / chronological order
293 int sequence
; // for manual passes through CFG
296 typedef std::pair
<Value
*, Value
*> ValuePair
;
298 class SpillCodeInserter
301 SpillCodeInserter(Function
*fn
) : func(fn
), stackSize(0), stackBase(0) { }
303 bool run(const std::list
<ValuePair
>&);
305 Symbol
*assignSlot(const Interval
&, const unsigned int size
);
306 Value
*offsetSlot(Value
*, const LValue
*);
307 inline int32_t getStackSize() const { return stackSize
; }
315 std::list
<Value
*> residents
; // needed to recalculate occup
318 inline uint8_t size() const { return sym
->reg
.size
; }
320 std::list
<SpillSlot
> slots
;
324 LValue
*unspill(Instruction
*usei
, LValue
*, Value
*slot
);
325 void spill(Instruction
*defi
, Value
*slot
, LValue
*);
329 RegAlloc::BuildIntervalsPass::addLiveRange(Value
*val
,
330 const BasicBlock
*bb
,
333 Instruction
*insn
= val
->getUniqueInsn();
336 insn
= bb
->getFirst();
338 assert(bb
->getFirst()->serial
<= bb
->getExit()->serial
);
339 assert(bb
->getExit()->serial
+ 1 >= end
);
341 int begin
= insn
->serial
;
342 if (begin
< bb
->getEntry()->serial
|| begin
> bb
->getExit()->serial
)
343 begin
= bb
->getEntry()->serial
;
345 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "%%%i <- live range [%i(%i), %i)\n",
346 val
->id
, begin
, insn
->serial
, end
);
348 if (begin
!= end
) // empty ranges are only added as hazards for fixed regs
349 val
->livei
.extend(begin
, end
);
353 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
)
355 if (b
->cfg
.incidentCount() <= 1)
359 for (Graph::EdgeIterator ei
= p
->cfg
.outgoing(); !ei
.end(); ei
.next())
360 if (ei
.getType() == Graph::Edge::TREE
||
361 ei
.getType() == Graph::Edge::FORWARD
)
367 size_t operator()(const std::pair
<Instruction
*, BasicBlock
*>& val
) const {
368 return hash
<Instruction
*>()(val
.first
) * 31 +
369 hash
<BasicBlock
*>()(val
.second
);
373 typedef unordered_map
<
374 std::pair
<Instruction
*, BasicBlock
*>, Value
*, PhiMapHash
> PhiMap
;
376 // Critical edges need to be split up so that work can be inserted along
377 // specific edge transitions. Unfortunately manipulating incident edges into a
378 // BB invalidates all the PHI nodes since their sources are implicitly ordered
379 // by incident edge order.
381 // TODO: Make it so that that is not the case, and PHI nodes store pointers to
384 RegAlloc::PhiMovesPass::splitEdges(BasicBlock
*bb
)
388 Graph::EdgeIterator ei
;
389 std::stack
<BasicBlock
*> stack
;
392 for (ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
393 pb
= BasicBlock::get(ei
.getNode());
395 if (needNewElseBlock(bb
, pb
))
399 // No critical edges were found, no need to perform any work.
403 // We're about to, potentially, reorder the inbound edges. This means that
404 // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
405 // nodes after the graph has been modified.
409 for (ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next(), j
++) {
410 pb
= BasicBlock::get(ei
.getNode());
411 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= phi
->next
)
412 phis
.insert(std::make_pair(std::make_pair(phi
, pb
), phi
->getSrc(j
)));
415 while (!stack
.empty()) {
417 pn
= new BasicBlock(func
);
420 pb
->cfg
.detach(&bb
->cfg
);
421 pb
->cfg
.attach(&pn
->cfg
, Graph::Edge::TREE
);
422 pn
->cfg
.attach(&bb
->cfg
, Graph::Edge::FORWARD
);
424 assert(pb
->getExit()->op
!= OP_CALL
);
425 if (pb
->getExit()->asFlow()->target
.bb
== bb
)
426 pb
->getExit()->asFlow()->target
.bb
= pn
;
428 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= phi
->next
) {
429 PhiMap::iterator it
= phis
.find(std::make_pair(phi
, pb
));
430 assert(it
!= phis
.end());
431 phis
.insert(std::make_pair(std::make_pair(phi
, pn
), it
->second
));
436 // Now go through and fix up all of the phi node sources.
438 for (ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next(), j
++) {
439 pb
= BasicBlock::get(ei
.getNode());
440 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= phi
->next
) {
441 PhiMap::const_iterator it
= phis
.find(std::make_pair(phi
, pb
));
442 assert(it
!= phis
.end());
444 phi
->setSrc(j
, it
->second
);
449 // For each operand of each PHI in b, generate a new value by inserting a MOV
450 // at the end of the block it is coming from and replace the operand with its
451 // result. This eliminates liveness conflicts and enables us to let values be
452 // copied to the right register if such a conflict exists nonetheless.
454 // These MOVs are also crucial in making sure the live intervals of phi srces
455 // are extended until the end of the loop, since they are not included in the
458 RegAlloc::PhiMovesPass::visit(BasicBlock
*bb
)
460 Instruction
*phi
, *mov
;
464 // insert MOVs (phi->src(j) should stem from j-th in-BB)
466 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
467 BasicBlock
*pb
= BasicBlock::get(ei
.getNode());
468 if (!pb
->isTerminated())
469 pb
->insertTail(new_FlowInstruction(func
, OP_BRA
, bb
));
471 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= phi
->next
) {
472 LValue
*tmp
= new_LValue(func
, phi
->getDef(0)->asLValue());
473 mov
= new_Instruction(func
, OP_MOV
, typeOfSize(tmp
->reg
.size
));
475 mov
->setSrc(0, phi
->getSrc(j
));
479 pb
->insertBefore(pb
->getExit(), mov
);
488 RegAlloc::ArgumentMovesPass::visit(BasicBlock
*bb
)
490 // Bind function call inputs/outputs to the same physical register
491 // the callee uses, inserting moves as appropriate for the case a
493 for (Instruction
*i
= bb
->getEntry(); i
; i
= i
->next
) {
494 FlowInstruction
*cal
= i
->asFlow();
495 // TODO: Handle indirect calls.
496 // Right now they should only be generated for builtins.
497 if (!cal
|| cal
->op
!= OP_CALL
|| cal
->builtin
|| cal
->indirect
)
499 RegisterSet
clobberSet(prog
->getTarget());
501 // Bind input values.
502 for (int s
= cal
->indirect
? 1 : 0; cal
->srcExists(s
); ++s
) {
503 const int t
= cal
->indirect
? (s
- 1) : s
;
504 LValue
*tmp
= new_LValue(func
, cal
->getSrc(s
)->asLValue());
505 tmp
->reg
.data
.id
= cal
->target
.fn
->ins
[t
].rep()->reg
.data
.id
;
508 new_Instruction(func
, OP_MOV
, typeOfSize(tmp
->reg
.size
));
510 mov
->setSrc(0, cal
->getSrc(s
));
513 bb
->insertBefore(cal
, mov
);
516 // Bind output values.
517 for (int d
= 0; cal
->defExists(d
); ++d
) {
518 LValue
*tmp
= new_LValue(func
, cal
->getDef(d
)->asLValue());
519 tmp
->reg
.data
.id
= cal
->target
.fn
->outs
[d
].rep()->reg
.data
.id
;
522 new_Instruction(func
, OP_MOV
, typeOfSize(tmp
->reg
.size
));
524 mov
->setDef(0, cal
->getDef(d
));
527 bb
->insertAfter(cal
, mov
);
528 clobberSet
.occupy(tmp
);
531 // Bind clobbered values.
532 for (std::deque
<Value
*>::iterator it
= cal
->target
.fn
->clobbers
.begin();
533 it
!= cal
->target
.fn
->clobbers
.end();
535 if (clobberSet
.testOccupy(*it
)) {
536 Value
*tmp
= new_LValue(func
, (*it
)->asLValue());
537 tmp
->reg
.data
.id
= (*it
)->reg
.data
.id
;
538 cal
->setDef(cal
->defCount(), tmp
);
543 // Update the clobber set of the function.
544 if (BasicBlock::get(func
->cfgExit
) == bb
) {
545 func
->buildDefSets();
546 for (unsigned int i
= 0; i
< bb
->defSet
.getSize(); ++i
)
547 if (bb
->defSet
.test(i
))
548 func
->clobbers
.push_back(func
->getLValue(i
));
554 // Build the set of live-in variables of bb.
556 RegAlloc::buildLiveSets(BasicBlock
*bb
)
558 Function
*f
= bb
->getFunction();
563 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "buildLiveSets(BB:%i)\n", bb
->getId());
565 bb
->liveSet
.allocate(func
->allLValues
.getSize(), false);
568 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
569 bn
= BasicBlock::get(ei
.getNode());
572 if (bn
->cfg
.visit(sequence
))
573 if (!buildLiveSets(bn
))
575 if (n
++ || bb
->liveSet
.marker
)
576 bb
->liveSet
|= bn
->liveSet
;
578 bb
->liveSet
= bn
->liveSet
;
580 if (!n
&& !bb
->liveSet
.marker
)
582 bb
->liveSet
.marker
= true;
584 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
585 INFO("BB:%i live set of out blocks:\n", bb
->getId());
589 // if (!bb->getEntry())
592 if (bb
== BasicBlock::get(f
->cfgExit
)) {
593 for (std::deque
<ValueRef
>::iterator it
= f
->outs
.begin();
594 it
!= f
->outs
.end(); ++it
) {
595 assert(it
->get()->asLValue());
596 bb
->liveSet
.set(it
->get()->id
);
600 for (i
= bb
->getExit(); i
&& i
!= bb
->getEntry()->prev
; i
= i
->prev
) {
601 for (d
= 0; i
->defExists(d
); ++d
)
602 bb
->liveSet
.clr(i
->getDef(d
)->id
);
603 for (s
= 0; i
->srcExists(s
); ++s
)
604 if (i
->getSrc(s
)->asLValue())
605 bb
->liveSet
.set(i
->getSrc(s
)->id
);
607 for (i
= bb
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
)
608 bb
->liveSet
.clr(i
->getDef(0)->id
);
610 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
611 INFO("BB:%i live set after propagation:\n", bb
->getId());
619 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock
*bb
)
621 BasicBlock
*bbA
= NULL
, *bbB
= NULL
;
623 if (bb
->cfg
.outgoingCount()) {
624 // trickery to save a loop of OR'ing liveSets
625 // aliasing works fine with BitSet::setOr
626 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
628 bb
->liveSet
.setOr(&bbA
->liveSet
, &bbB
->liveSet
);
633 bbB
= BasicBlock::get(ei
.getNode());
635 bb
->liveSet
.setOr(&bbB
->liveSet
, bbA
? &bbA
->liveSet
: NULL
);
637 if (bb
->cfg
.incidentCount()) {
643 RegAlloc::BuildIntervalsPass::visit(BasicBlock
*bb
)
645 collectLiveValues(bb
);
647 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "BuildIntervals(BB:%i)\n", bb
->getId());
649 // go through out blocks and delete phi sources that do not originate from
650 // the current block from the live set
651 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
652 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
654 for (Instruction
*i
= out
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
) {
655 bb
->liveSet
.clr(i
->getDef(0)->id
);
657 for (int s
= 0; i
->srcExists(s
); ++s
) {
658 assert(i
->src(s
).getInsn());
659 if (i
->getSrc(s
)->getUniqueInsn()->bb
== bb
) // XXX: reachableBy ?
660 bb
->liveSet
.set(i
->getSrc(s
)->id
);
662 bb
->liveSet
.clr(i
->getSrc(s
)->id
);
667 // remaining live-outs are live until end
669 for (unsigned int j
= 0; j
< bb
->liveSet
.getSize(); ++j
)
670 if (bb
->liveSet
.test(j
))
671 addLiveRange(func
->getLValue(j
), bb
, bb
->getExit()->serial
+ 1);
674 for (Instruction
*i
= bb
->getExit(); i
&& i
->op
!= OP_PHI
; i
= i
->prev
) {
675 for (int d
= 0; i
->defExists(d
); ++d
) {
676 bb
->liveSet
.clr(i
->getDef(d
)->id
);
677 if (i
->getDef(d
)->reg
.data
.id
>= 0) // add hazard for fixed regs
678 i
->getDef(d
)->livei
.extend(i
->serial
, i
->serial
);
681 for (int s
= 0; i
->srcExists(s
); ++s
) {
682 if (!i
->getSrc(s
)->asLValue())
684 if (!bb
->liveSet
.test(i
->getSrc(s
)->id
)) {
685 bb
->liveSet
.set(i
->getSrc(s
)->id
);
686 addLiveRange(i
->getSrc(s
), bb
, i
->serial
);
691 if (bb
== BasicBlock::get(func
->cfg
.getRoot())) {
692 for (std::deque
<ValueDef
>::iterator it
= func
->ins
.begin();
693 it
!= func
->ins
.end(); ++it
) {
694 if (it
->get()->reg
.data
.id
>= 0) // add hazard for fixed regs
695 it
->get()->livei
.extend(0, 1);
703 #define JOIN_MASK_PHI (1 << 0)
704 #define JOIN_MASK_UNION (1 << 1)
705 #define JOIN_MASK_MOV (1 << 2)
706 #define JOIN_MASK_TEX (1 << 3)
711 GCRA(Function
*, SpillCodeInserter
&);
714 bool allocateRegisters(ArrayList
& insns
);
716 void printNodeInfo() const;
719 class RIG_Node
: public Graph::Node
724 void init(const RegisterSet
&, LValue
*);
726 void addInterference(RIG_Node
*);
727 void addRegPreference(RIG_Node
*);
729 inline LValue
*getValue() const
731 return reinterpret_cast<LValue
*>(data
);
733 inline void setValue(LValue
*lval
) { data
= lval
; }
735 inline uint8_t getCompMask() const
737 return ((1 << colors
) - 1) << (reg
& 7);
740 static inline RIG_Node
*get(const Graph::EdgeIterator
& ei
)
742 return static_cast<RIG_Node
*>(ei
.getNode());
747 uint16_t degreeLimit
; // if deg < degLimit, node is trivially colourable
756 // list pointers for simplify() phase
760 // union of the live intervals of all coalesced values (we want to retain
761 // the separate intervals for testing interference of compound values)
764 std::list
<RIG_Node
*> prefRegs
;
768 inline RIG_Node
*getNode(const LValue
*v
) const { return &nodes
[v
->id
]; }
770 void buildRIG(ArrayList
&);
771 bool coalesce(ArrayList
&);
772 bool doCoalesce(ArrayList
&, unsigned int mask
);
773 void calculateSpillWeights();
775 bool selectRegisters();
776 void cleanup(const bool success
);
778 void simplifyEdge(RIG_Node
*, RIG_Node
*);
779 void simplifyNode(RIG_Node
*);
781 bool coalesceValues(Value
*, Value
*, bool force
);
782 void resolveSplitsAndMerges();
783 void makeCompound(Instruction
*, bool isSplit
);
785 inline void checkInterference(const RIG_Node
*, Graph::EdgeIterator
&);
787 inline void insertOrderedTail(std::list
<RIG_Node
*>&, RIG_Node
*);
788 void checkList(std::list
<RIG_Node
*>&);
791 std::stack
<uint32_t> stack
;
793 // list headers for simplify() phase
799 unsigned int nodeCount
;
805 uint8_t data
[17][17];
808 for (int i
= 1; i
<= 16; ++i
)
809 for (int j
= 1; j
<= 16; ++j
)
810 data
[i
][j
] = j
* ((i
+ j
- 1) / j
);
813 const uint8_t* operator[](std::size_t i
) const {
818 static const RelDegree relDegree
;
822 // need to fixup register id for participants of OP_MERGE/SPLIT
823 std::list
<Instruction
*> merges
;
824 std::list
<Instruction
*> splits
;
826 SpillCodeInserter
& spill
;
827 std::list
<ValuePair
> mustSpill
;
830 const GCRA::RelDegree
GCRA::relDegree
;
832 GCRA::RIG_Node::RIG_Node() : Node(NULL
), next(this), prev(this)
838 GCRA::printNodeInfo() const
840 for (unsigned int i
= 0; i
< nodeCount
; ++i
) {
841 if (!nodes
[i
].colors
)
843 INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X",
845 nodes
[i
].f
,nodes
[i
].reg
,nodes
[i
].colors
,
847 nodes
[i
].degree
, nodes
[i
].degreeLimit
);
849 for (Graph::EdgeIterator ei
= nodes
[i
].outgoing(); !ei
.end(); ei
.next())
850 INFO(" %%%i", RIG_Node::get(ei
)->getValue()->id
);
851 for (Graph::EdgeIterator ei
= nodes
[i
].incident(); !ei
.end(); ei
.next())
852 INFO(" %%%i", RIG_Node::get(ei
)->getValue()->id
);
858 isShortRegOp(Instruction
*insn
)
860 // Immediates are always in src1 (except zeroes, which end up getting
861 // replaced with a zero reg). Every other situation can be resolved by
862 // using a long encoding.
863 return insn
->srcExists(1) && insn
->src(1).getFile() == FILE_IMMEDIATE
&&
864 insn
->getSrc(1)->reg
.data
.u64
;
867 // Check if this LValue is ever used in an instruction that can't be encoded
868 // with long registers (i.e. > r63)
870 isShortRegVal(LValue
*lval
)
872 if (lval
->getInsn() == NULL
)
874 for (Value::DefCIterator def
= lval
->defs
.begin();
875 def
!= lval
->defs
.end(); ++def
)
876 if (isShortRegOp((*def
)->getInsn()))
878 for (Value::UseCIterator use
= lval
->uses
.begin();
879 use
!= lval
->uses
.end(); ++use
)
880 if (isShortRegOp((*use
)->getInsn()))
886 GCRA::RIG_Node::init(const RegisterSet
& regs
, LValue
*lval
)
889 if (lval
->reg
.data
.id
>= 0)
890 lval
->noSpill
= lval
->fixedReg
= 1;
892 colors
= regs
.units(lval
->reg
.file
, lval
->reg
.size
);
895 if (lval
->reg
.data
.id
>= 0)
896 reg
= regs
.idToUnits(lval
);
898 weight
= std::numeric_limits
<float>::infinity();
900 maxReg
= regs
.getFileSize(f
);
901 // On nv50, we lose a bit of gpr encoding when there's an embedded
903 if (regs
.restrictedGPR16Range
&& f
== FILE_GPR
&& (lval
->reg
.size
== 2 || isShortRegVal(lval
)))
905 degreeLimit
= maxReg
;
906 degreeLimit
-= relDegree
[1][colors
] - 1;
908 livei
.insert(lval
->livei
);
912 GCRA::coalesceValues(Value
*dst
, Value
*src
, bool force
)
914 LValue
*rep
= dst
->join
->asLValue();
915 LValue
*val
= src
->join
->asLValue();
917 if (!force
&& val
->reg
.data
.id
>= 0) {
918 rep
= src
->join
->asLValue();
919 val
= dst
->join
->asLValue();
921 RIG_Node
*nRep
= &nodes
[rep
->id
];
922 RIG_Node
*nVal
= &nodes
[val
->id
];
924 if (src
->reg
.file
!= dst
->reg
.file
) {
927 WARN("forced coalescing of values in different files !\n");
929 if (!force
&& dst
->reg
.size
!= src
->reg
.size
)
932 if ((rep
->reg
.data
.id
>= 0) && (rep
->reg
.data
.id
!= val
->reg
.data
.id
)) {
934 if (val
->reg
.data
.id
>= 0)
935 WARN("forced coalescing of values in different fixed regs !\n");
937 if (val
->reg
.data
.id
>= 0)
939 // make sure that there is no overlap with the fixed register of rep
940 for (ArrayList::Iterator it
= func
->allLValues
.iterator();
941 !it
.end(); it
.next()) {
942 Value
*reg
= reinterpret_cast<Value
*>(it
.get())->asLValue();
944 if (reg
->interfers(rep
) && reg
->livei
.overlaps(nVal
->livei
))
950 if (!force
&& nRep
->livei
.overlaps(nVal
->livei
))
953 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "joining %%%i($%i) <- %%%i\n",
954 rep
->id
, rep
->reg
.data
.id
, val
->id
);
956 // set join pointer of all values joined with val
957 for (Value::DefIterator def
= val
->defs
.begin(); def
!= val
->defs
.end();
959 (*def
)->get()->join
= rep
;
960 assert(rep
->join
== rep
&& val
->join
== rep
);
962 // add val's definitions to rep and extend the live interval of its RIG node
963 rep
->defs
.insert(rep
->defs
.end(), val
->defs
.begin(), val
->defs
.end());
964 nRep
->livei
.unify(nVal
->livei
);
965 nRep
->degreeLimit
= MIN2(nRep
->degreeLimit
, nVal
->degreeLimit
);
966 nRep
->maxReg
= MIN2(nRep
->maxReg
, nVal
->maxReg
);
971 GCRA::coalesce(ArrayList
& insns
)
973 bool ret
= doCoalesce(insns
, JOIN_MASK_PHI
);
976 switch (func
->getProgram()->getTarget()->getChipset() & ~0xf) {
981 ret
= doCoalesce(insns
, JOIN_MASK_UNION
| JOIN_MASK_TEX
);
991 ret
= doCoalesce(insns
, JOIN_MASK_UNION
);
998 return doCoalesce(insns
, JOIN_MASK_MOV
);
1001 static inline uint8_t makeCompMask(int compSize
, int base
, int size
)
1003 uint8_t m
= ((1 << size
) - 1) << base
;
1010 return (m
<< 4) | m
;
1013 return (m
<< 4) | m
;
1015 assert(compSize
<= 8);
1020 // Used when coalescing moves. The non-compound value will become one, e.g.:
1021 // mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 }
1022 // split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
1023 static inline void copyCompound(Value
*dst
, Value
*src
)
1025 LValue
*ldst
= dst
->asLValue();
1026 LValue
*lsrc
= src
->asLValue();
1028 if (ldst
->compound
&& !lsrc
->compound
) {
1029 LValue
*swap
= lsrc
;
1034 ldst
->compound
= lsrc
->compound
;
1035 ldst
->compMask
= lsrc
->compMask
;
1039 GCRA::makeCompound(Instruction
*insn
, bool split
)
1041 LValue
*rep
= (split
? insn
->getSrc(0) : insn
->getDef(0))->asLValue();
1043 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
1044 INFO("makeCompound(split = %i): ", split
);
1048 const unsigned int size
= getNode(rep
)->colors
;
1049 unsigned int base
= 0;
1052 rep
->compMask
= 0xff;
1055 for (int c
= 0; split
? insn
->defExists(c
) : insn
->srcExists(c
); ++c
) {
1056 LValue
*val
= (split
? insn
->getDef(c
) : insn
->getSrc(c
))->asLValue();
1060 val
->compMask
= 0xff;
1061 val
->compMask
&= makeCompMask(size
, base
, getNode(val
)->colors
);
1062 assert(val
->compMask
);
1064 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "compound: %%%i:%02x <- %%%i:%02x\n",
1065 rep
->id
, rep
->compMask
, val
->id
, val
->compMask
);
1067 base
+= getNode(val
)->colors
;
1069 assert(base
== size
);
1073 GCRA::doCoalesce(ArrayList
& insns
, unsigned int mask
)
1077 for (n
= 0; n
< insns
.getSize(); ++n
) {
1079 Instruction
*insn
= reinterpret_cast<Instruction
*>(insns
.get(n
));
1083 if (!(mask
& JOIN_MASK_PHI
))
1085 for (c
= 0; insn
->srcExists(c
); ++c
)
1086 if (!coalesceValues(insn
->getDef(0), insn
->getSrc(c
), false)) {
1088 ERROR("failed to coalesce phi operands\n");
1094 if (!(mask
& JOIN_MASK_UNION
))
1096 for (c
= 0; insn
->srcExists(c
); ++c
)
1097 coalesceValues(insn
->getDef(0), insn
->getSrc(c
), true);
1098 if (insn
->op
== OP_MERGE
) {
1099 merges
.push_back(insn
);
1100 if (insn
->srcExists(1))
1101 makeCompound(insn
, false);
1105 if (!(mask
& JOIN_MASK_UNION
))
1107 splits
.push_back(insn
);
1108 for (c
= 0; insn
->defExists(c
); ++c
)
1109 coalesceValues(insn
->getSrc(0), insn
->getDef(c
), true);
1110 makeCompound(insn
, true);
1113 if (!(mask
& JOIN_MASK_MOV
))
1116 if (!insn
->getDef(0)->uses
.empty())
1117 i
= (*insn
->getDef(0)->uses
.begin())->getInsn();
1118 // if this is a contraint-move there will only be a single use
1119 if (i
&& i
->op
== OP_MERGE
) // do we really still need this ?
1121 i
= insn
->getSrc(0)->getUniqueInsn();
1122 if (i
&& !i
->constrainedDefs()) {
1123 if (coalesceValues(insn
->getDef(0), insn
->getSrc(0), false))
1124 copyCompound(insn
->getSrc(0), insn
->getDef(0));
1137 if (!(mask
& JOIN_MASK_TEX
))
1139 for (c
= 0; insn
->srcExists(c
) && c
!= insn
->predSrc
; ++c
)
1140 coalesceValues(insn
->getDef(c
), insn
->getSrc(c
), true);
1150 GCRA::RIG_Node::addInterference(RIG_Node
*node
)
1152 this->degree
+= relDegree
[node
->colors
][colors
];
1153 node
->degree
+= relDegree
[colors
][node
->colors
];
1155 this->attach(node
, Graph::Edge::CROSS
);
1159 GCRA::RIG_Node::addRegPreference(RIG_Node
*node
)
1161 prefRegs
.push_back(node
);
1164 GCRA::GCRA(Function
*fn
, SpillCodeInserter
& spill
) :
1166 regs(fn
->getProgram()->getTarget()),
1169 prog
= func
->getProgram();
1179 GCRA::checkList(std::list
<RIG_Node
*>& lst
)
1181 GCRA::RIG_Node
*prev
= NULL
;
1183 for (std::list
<RIG_Node
*>::iterator it
= lst
.begin();
1186 assert((*it
)->getValue()->join
== (*it
)->getValue());
1188 assert(prev
->livei
.begin() <= (*it
)->livei
.begin());
1194 GCRA::insertOrderedTail(std::list
<RIG_Node
*>& list
, RIG_Node
*node
)
1196 if (node
->livei
.isEmpty())
1198 // only the intervals of joined values don't necessarily arrive in order
1199 std::list
<RIG_Node
*>::iterator prev
, it
;
1200 for (it
= list
.end(); it
!= list
.begin(); it
= prev
) {
1203 if ((*prev
)->livei
.begin() <= node
->livei
.begin())
1206 list
.insert(it
, node
);
1210 GCRA::buildRIG(ArrayList
& insns
)
1212 std::list
<RIG_Node
*> values
, active
;
1214 for (std::deque
<ValueDef
>::iterator it
= func
->ins
.begin();
1215 it
!= func
->ins
.end(); ++it
)
1216 insertOrderedTail(values
, getNode(it
->get()->asLValue()));
1218 for (int i
= 0; i
< insns
.getSize(); ++i
) {
1219 Instruction
*insn
= reinterpret_cast<Instruction
*>(insns
.get(i
));
1220 for (int d
= 0; insn
->defExists(d
); ++d
)
1221 if (insn
->getDef(d
)->rep() == insn
->getDef(d
))
1222 insertOrderedTail(values
, getNode(insn
->getDef(d
)->asLValue()));
1226 while (!values
.empty()) {
1227 RIG_Node
*cur
= values
.front();
1229 for (std::list
<RIG_Node
*>::iterator it
= active
.begin();
1230 it
!= active
.end();) {
1231 RIG_Node
*node
= *it
;
1233 if (node
->livei
.end() <= cur
->livei
.begin()) {
1234 it
= active
.erase(it
);
1236 if (node
->f
== cur
->f
&& node
->livei
.overlaps(cur
->livei
))
1237 cur
->addInterference(node
);
1242 active
.push_back(cur
);
1247 GCRA::calculateSpillWeights()
1249 for (unsigned int i
= 0; i
< nodeCount
; ++i
) {
1250 RIG_Node
*const n
= &nodes
[i
];
1251 if (!nodes
[i
].colors
|| nodes
[i
].livei
.isEmpty())
1253 if (nodes
[i
].reg
>= 0) {
1255 regs
.occupy(n
->f
, n
->reg
, n
->colors
);
1258 LValue
*val
= nodes
[i
].getValue();
1260 if (!val
->noSpill
) {
1262 for (Value::DefIterator it
= val
->defs
.begin();
1263 it
!= val
->defs
.end();
1265 rc
+= (*it
)->get()->refCount();
1268 (float)rc
* (float)rc
/ (float)nodes
[i
].livei
.extent();
1271 if (nodes
[i
].degree
< nodes
[i
].degreeLimit
) {
1273 if (val
->reg
.size
> 4)
1275 DLLIST_ADDHEAD(&lo
[l
], &nodes
[i
]);
1277 DLLIST_ADDHEAD(&hi
, &nodes
[i
]);
1280 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1285 GCRA::simplifyEdge(RIG_Node
*a
, RIG_Node
*b
)
1287 bool move
= b
->degree
>= b
->degreeLimit
;
1289 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1290 "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n",
1291 a
->getValue()->id
, a
->degree
, a
->degreeLimit
,
1292 b
->getValue()->id
, b
->degree
, b
->degreeLimit
);
1294 b
->degree
-= relDegree
[a
->colors
][b
->colors
];
1296 move
= move
&& b
->degree
< b
->degreeLimit
;
1297 if (move
&& !DLLIST_EMPTY(b
)) {
1298 int l
= (b
->getValue()->reg
.size
> 4) ? 1 : 0;
1300 DLLIST_ADDTAIL(&lo
[l
], b
);
1305 GCRA::simplifyNode(RIG_Node
*node
)
1307 for (Graph::EdgeIterator ei
= node
->outgoing(); !ei
.end(); ei
.next())
1308 simplifyEdge(node
, RIG_Node::get(ei
));
1310 for (Graph::EdgeIterator ei
= node
->incident(); !ei
.end(); ei
.next())
1311 simplifyEdge(node
, RIG_Node::get(ei
));
1314 stack
.push(node
->getValue()->id
);
1316 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "SIMPLIFY: pushed %%%i%s\n",
1317 node
->getValue()->id
,
1318 (node
->degree
< node
->degreeLimit
) ? "" : "(spill)");
1325 if (!DLLIST_EMPTY(&lo
[0])) {
1327 simplifyNode(lo
[0].next
);
1328 } while (!DLLIST_EMPTY(&lo
[0]));
1330 if (!DLLIST_EMPTY(&lo
[1])) {
1331 simplifyNode(lo
[1].next
);
1333 if (!DLLIST_EMPTY(&hi
)) {
1334 RIG_Node
*best
= hi
.next
;
1335 unsigned bestMaxReg
= best
->maxReg
;
1336 float bestScore
= best
->weight
/ (float)best
->degree
;
1337 // Spill candidate. First go through the ones with the highest max
1338 // register, then the ones with lower. That way the ones with the
1339 // lowest requirement will be allocated first, since it's a stack.
1340 for (RIG_Node
*it
= best
->next
; it
!= &hi
; it
= it
->next
) {
1341 float score
= it
->weight
/ (float)it
->degree
;
1342 if (score
< bestScore
|| it
->maxReg
> bestMaxReg
) {
1345 bestMaxReg
= it
->maxReg
;
1348 if (isinf(bestScore
)) {
1349 ERROR("no viable spill candidates left\n");
1360 GCRA::checkInterference(const RIG_Node
*node
, Graph::EdgeIterator
& ei
)
1362 const RIG_Node
*intf
= RIG_Node::get(ei
);
1366 const LValue
*vA
= node
->getValue();
1367 const LValue
*vB
= intf
->getValue();
1369 const uint8_t intfMask
= ((1 << intf
->colors
) - 1) << (intf
->reg
& 7);
1371 if (vA
->compound
| vB
->compound
) {
1372 // NOTE: this only works for >aligned< register tuples !
1373 for (Value::DefCIterator D
= vA
->defs
.begin(); D
!= vA
->defs
.end(); ++D
) {
1374 for (Value::DefCIterator d
= vB
->defs
.begin(); d
!= vB
->defs
.end(); ++d
) {
1375 const LValue
*vD
= (*D
)->get()->asLValue();
1376 const LValue
*vd
= (*d
)->get()->asLValue();
1378 if (!vD
->livei
.overlaps(vd
->livei
)) {
1379 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "(%%%i) X (%%%i): no overlap\n",
1384 uint8_t mask
= vD
->compound
? vD
->compMask
: ~0;
1386 assert(vB
->compound
);
1387 mask
&= vd
->compMask
& vB
->compMask
;
1392 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1393 "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n",
1395 vD
->compound
? vD
->compMask
: 0xff,
1397 vd
->compound
? vd
->compMask
: intfMask
,
1398 vB
->compMask
, intf
->reg
& ~7, mask
);
1400 regs
.occupyMask(node
->f
, intf
->reg
& ~7, mask
);
1404 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1405 "(%%%i) X (%%%i): $r%i + %u\n",
1406 vA
->id
, vB
->id
, intf
->reg
, intf
->colors
);
1407 regs
.occupy(node
->f
, intf
->reg
, intf
->colors
);
1412 GCRA::selectRegisters()
1414 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "\nSELECT phase\n");
1416 while (!stack
.empty()) {
1417 RIG_Node
*node
= &nodes
[stack
.top()];
1420 regs
.reset(node
->f
);
1422 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "\nNODE[%%%i, %u colors]\n",
1423 node
->getValue()->id
, node
->colors
);
1425 for (Graph::EdgeIterator ei
= node
->outgoing(); !ei
.end(); ei
.next())
1426 checkInterference(node
, ei
);
1427 for (Graph::EdgeIterator ei
= node
->incident(); !ei
.end(); ei
.next())
1428 checkInterference(node
, ei
);
1430 if (!node
->prefRegs
.empty()) {
1431 for (std::list
<RIG_Node
*>::const_iterator it
= node
->prefRegs
.begin();
1432 it
!= node
->prefRegs
.end();
1434 if ((*it
)->reg
>= 0 &&
1435 regs
.testOccupy(node
->f
, (*it
)->reg
, node
->colors
)) {
1436 node
->reg
= (*it
)->reg
;
1443 LValue
*lval
= node
->getValue();
1444 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1445 regs
.print(node
->f
);
1446 bool ret
= regs
.assign(node
->reg
, node
->f
, node
->colors
, node
->maxReg
);
1448 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "assigned reg %i\n", node
->reg
);
1449 lval
->compMask
= node
->getCompMask();
1451 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "must spill: %%%i (size %u)\n",
1452 lval
->id
, lval
->reg
.size
);
1453 Symbol
*slot
= NULL
;
1454 if (lval
->reg
.file
== FILE_GPR
)
1455 slot
= spill
.assignSlot(node
->livei
, lval
->reg
.size
);
1456 mustSpill
.push_back(ValuePair(lval
, slot
));
1459 if (!mustSpill
.empty())
1461 for (unsigned int i
= 0; i
< nodeCount
; ++i
) {
1462 LValue
*lval
= nodes
[i
].getValue();
1463 if (nodes
[i
].reg
>= 0 && nodes
[i
].colors
> 0)
1465 regs
.unitsToId(nodes
[i
].f
, nodes
[i
].reg
, lval
->reg
.size
);
1471 GCRA::allocateRegisters(ArrayList
& insns
)
1475 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1476 "allocateRegisters to %u instructions\n", insns
.getSize());
1478 nodeCount
= func
->allLValues
.getSize();
1479 nodes
= new RIG_Node
[nodeCount
];
1482 for (unsigned int i
= 0; i
< nodeCount
; ++i
) {
1483 LValue
*lval
= reinterpret_cast<LValue
*>(func
->allLValues
.get(i
));
1485 nodes
[i
].init(regs
, lval
);
1486 RIG
.insert(&nodes
[i
]);
1488 if (lval
->inFile(FILE_GPR
) && lval
->getInsn() != NULL
) {
1489 Instruction
*insn
= lval
->getInsn();
1490 if (insn
->op
!= OP_MAD
&& insn
->op
!= OP_FMA
&& insn
->op
!= OP_SAD
)
1492 // For both of the cases below, we only want to add the preference
1493 // if all arguments are in registers.
1494 if (insn
->src(0).getFile() != FILE_GPR
||
1495 insn
->src(1).getFile() != FILE_GPR
||
1496 insn
->src(2).getFile() != FILE_GPR
)
1498 if (prog
->getTarget()->getChipset() < 0xc0) {
1499 // Outputting a flag is not supported with short encodings nor
1500 // with immediate arguments.
1501 // See handleMADforNV50.
1502 if (insn
->flagsDef
>= 0)
1505 // We can only fold immediate arguments if dst == src2. This
1506 // only matters if one of the first two arguments is an
1507 // immediate. This form is also only supported for floats.
1508 // See handleMADforNVC0.
1510 if (insn
->dType
!= TYPE_F32
)
1512 if (!insn
->src(0).getImmediate(imm
) &&
1513 !insn
->src(1).getImmediate(imm
))
1517 nodes
[i
].addRegPreference(getNode(insn
->getSrc(2)->asLValue()));
1522 // coalesce first, we use only 1 RIG node for a group of joined values
1523 ret
= coalesce(insns
);
1527 if (func
->getProgram()->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1528 func
->printLiveIntervals();
1531 calculateSpillWeights();
1536 ret
= selectRegisters();
1538 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
,
1539 "selectRegisters failed, inserting spill code ...\n");
1540 regs
.reset(FILE_GPR
, true);
1541 spill
.run(mustSpill
);
1542 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1545 prog
->maxGPR
= std::max(prog
->maxGPR
, regs
.getMaxAssigned(FILE_GPR
));
1554 GCRA::cleanup(const bool success
)
1558 for (ArrayList::Iterator it
= func
->allLValues
.iterator();
1559 !it
.end(); it
.next()) {
1560 LValue
*lval
= reinterpret_cast<LValue
*>(it
.get());
1562 lval
->livei
.clear();
1567 if (lval
->join
== lval
)
1571 lval
->reg
.data
.id
= lval
->join
->reg
.data
.id
;
1573 for (Value::DefIterator d
= lval
->defs
.begin(); d
!= lval
->defs
.end();
1575 lval
->join
->defs
.remove(*d
);
1581 resolveSplitsAndMerges();
1582 splits
.clear(); // avoid duplicate entries on next coalesce pass
1587 hi
.next
= hi
.prev
= &hi
;
1588 lo
[0].next
= lo
[0].prev
= &lo
[0];
1589 lo
[1].next
= lo
[1].prev
= &lo
[1];
1593 SpillCodeInserter::assignSlot(const Interval
&livei
, const unsigned int size
)
1596 int32_t offsetBase
= stackSize
;
1598 std::list
<SpillSlot
>::iterator pos
= slots
.end(), it
= slots
.begin();
1600 if (offsetBase
% size
)
1601 offsetBase
+= size
- (offsetBase
% size
);
1605 for (offset
= offsetBase
; offset
< stackSize
; offset
+= size
) {
1606 const int32_t entryEnd
= offset
+ size
;
1607 while (it
!= slots
.end() && it
->offset
< offset
)
1609 if (it
== slots
.end()) // no slots left
1611 std::list
<SpillSlot
>::iterator bgn
= it
;
1613 while (it
!= slots
.end() && it
->offset
< entryEnd
) {
1615 if (it
->occup
.overlaps(livei
))
1619 if (it
== slots
.end() || it
->offset
>= entryEnd
) {
1621 for (; bgn
!= slots
.end() && bgn
->offset
< entryEnd
; ++bgn
) {
1622 bgn
->occup
.insert(livei
);
1623 if (bgn
->size() == size
)
1624 slot
.sym
= bgn
->sym
;
1630 stackSize
= offset
+ size
;
1631 slot
.offset
= offset
;
1632 slot
.sym
= new_Symbol(func
->getProgram(), FILE_MEMORY_LOCAL
);
1633 if (!func
->stackPtr
)
1634 offset
+= func
->tlsBase
;
1635 slot
.sym
->setAddress(NULL
, offset
);
1636 slot
.sym
->reg
.size
= size
;
1637 slots
.insert(pos
, slot
)->occup
.insert(livei
);
1643 SpillCodeInserter::offsetSlot(Value
*base
, const LValue
*lval
)
1645 if (!lval
->compound
|| (lval
->compMask
& 0x1))
1647 Value
*slot
= cloneShallow(func
, base
);
1649 slot
->reg
.data
.offset
+= (ffs(lval
->compMask
) - 1) * lval
->reg
.size
;
1650 slot
->reg
.size
= lval
->reg
.size
;
1656 SpillCodeInserter::spill(Instruction
*defi
, Value
*slot
, LValue
*lval
)
1658 const DataType ty
= typeOfSize(lval
->reg
.size
);
1660 slot
= offsetSlot(slot
, lval
);
1663 if (slot
->reg
.file
== FILE_MEMORY_LOCAL
) {
1665 if (ty
!= TYPE_B96
) {
1666 st
= new_Instruction(func
, OP_STORE
, ty
);
1667 st
->setSrc(0, slot
);
1668 st
->setSrc(1, lval
);
1670 st
= new_Instruction(func
, OP_SPLIT
, ty
);
1671 st
->setSrc(0, lval
);
1672 for (int d
= 0; d
< lval
->reg
.size
/ 4; ++d
)
1673 st
->setDef(d
, new_LValue(func
, FILE_GPR
));
1675 for (int d
= lval
->reg
.size
/ 4 - 1; d
>= 0; --d
) {
1676 Value
*tmp
= cloneShallow(func
, slot
);
1678 tmp
->reg
.data
.offset
+= 4 * d
;
1680 Instruction
*s
= new_Instruction(func
, OP_STORE
, TYPE_U32
);
1682 s
->setSrc(1, st
->getDef(d
));
1683 defi
->bb
->insertAfter(defi
, s
);
1687 st
= new_Instruction(func
, OP_CVT
, ty
);
1688 st
->setDef(0, slot
);
1689 st
->setSrc(0, lval
);
1690 if (lval
->reg
.file
== FILE_FLAGS
)
1693 defi
->bb
->insertAfter(defi
, st
);
1697 SpillCodeInserter::unspill(Instruction
*usei
, LValue
*lval
, Value
*slot
)
1699 const DataType ty
= typeOfSize(lval
->reg
.size
);
1701 slot
= offsetSlot(slot
, lval
);
1702 lval
= cloneShallow(func
, lval
);
1705 if (slot
->reg
.file
== FILE_MEMORY_LOCAL
) {
1707 if (ty
!= TYPE_B96
) {
1708 ld
= new_Instruction(func
, OP_LOAD
, ty
);
1710 ld
= new_Instruction(func
, OP_MERGE
, ty
);
1711 for (int d
= 0; d
< lval
->reg
.size
/ 4; ++d
) {
1712 Value
*tmp
= cloneShallow(func
, slot
);
1715 tmp
->reg
.data
.offset
+= 4 * d
;
1717 Instruction
*l
= new_Instruction(func
, OP_LOAD
, TYPE_U32
);
1718 l
->setDef(0, (val
= new_LValue(func
, FILE_GPR
)));
1720 usei
->bb
->insertBefore(usei
, l
);
1724 ld
->setDef(0, lval
);
1725 usei
->bb
->insertBefore(usei
, ld
);
1729 ld
= new_Instruction(func
, OP_CVT
, ty
);
1731 ld
->setDef(0, lval
);
1732 ld
->setSrc(0, slot
);
1733 if (lval
->reg
.file
== FILE_FLAGS
)
1736 usei
->bb
->insertBefore(usei
, ld
);
1741 value_cmp(ValueRef
*a
, ValueRef
*b
) {
1742 Instruction
*ai
= a
->getInsn(), *bi
= b
->getInsn();
1743 if (ai
->bb
!= bi
->bb
)
1744 return ai
->bb
->getId() < bi
->bb
->getId();
1745 return ai
->serial
< bi
->serial
;
1748 // For each value that is to be spilled, go through all its definitions.
1749 // A value can have multiple definitions if it has been coalesced before.
1750 // For each definition, first go through all its uses and insert an unspill
1751 // instruction before it, then replace the use with the temporary register.
1752 // Unspill can be either a load from memory or simply a move to another
1754 // For "Pseudo" instructions (like PHI, SPLIT, MERGE) we can erase the use
1755 // if we have spilled to a memory location, or simply with the new register.
1756 // No load or conversion instruction should be needed.
1758 SpillCodeInserter::run(const std::list
<ValuePair
>& lst
)
1760 for (std::list
<ValuePair
>::const_iterator it
= lst
.begin(); it
!= lst
.end();
1762 LValue
*lval
= it
->first
->asLValue();
1763 Symbol
*mem
= it
->second
? it
->second
->asSym() : NULL
;
1765 // Keep track of which instructions to delete later. Deleting them
1766 // inside the loop is unsafe since a single instruction may have
1767 // multiple destinations that all need to be spilled (like OP_SPLIT).
1768 unordered_set
<Instruction
*> to_del
;
1770 for (Value::DefIterator d
= lval
->defs
.begin(); d
!= lval
->defs
.end();
1773 static_cast<Value
*>(mem
) : new_LValue(func
, FILE_GPR
);
1775 Instruction
*last
= NULL
;
1777 LValue
*dval
= (*d
)->get()->asLValue();
1778 Instruction
*defi
= (*d
)->getInsn();
1780 // Sort all the uses by BB/instruction so that we don't unspill
1781 // multiple times in a row, and also remove a source of
1783 std::vector
<ValueRef
*> refs(dval
->uses
.begin(), dval
->uses
.end());
1784 std::sort(refs
.begin(), refs
.end(), value_cmp
);
1786 // Unspill at each use *before* inserting spill instructions,
1787 // we don't want to have the spill instructions in the use list here.
1788 for (std::vector
<ValueRef
*>::const_iterator it
= refs
.begin();
1789 it
!= refs
.end(); ++it
) {
1791 Instruction
*usei
= u
->getInsn();
1793 if (usei
->isPseudo()) {
1794 tmp
= (slot
->reg
.file
== FILE_MEMORY_LOCAL
) ? NULL
: slot
;
1797 if (!last
|| (usei
!= last
->next
&& usei
!= last
))
1798 tmp
= unspill(usei
, dval
, slot
);
1805 if (defi
->isPseudo()) {
1806 d
= lval
->defs
.erase(d
);
1808 if (slot
->reg
.file
== FILE_MEMORY_LOCAL
)
1809 to_del
.insert(defi
);
1811 defi
->setDef(0, slot
);
1813 spill(defi
, slot
, dval
);
1817 for (unordered_set
<Instruction
*>::const_iterator it
= to_del
.begin();
1818 it
!= to_del
.end(); ++it
)
1819 delete_Instruction(func
->getProgram(), *it
);
1822 // TODO: We're not trying to reuse old slots in a potential next iteration.
1823 // We have to update the slots' livei intervals to be able to do that.
1824 stackBase
= stackSize
;
1832 for (IteratorRef it
= prog
->calls
.iteratorDFS(false);
1833 !it
->end(); it
->next()) {
1834 func
= Function::get(reinterpret_cast<Graph::Node
*>(it
->get()));
1836 func
->tlsBase
= prog
->tlsSize
;
1839 prog
->tlsSize
+= func
->tlsSize
;
1845 RegAlloc::execFunc()
1847 InsertConstraintsPass insertConstr
;
1848 PhiMovesPass insertPhiMoves
;
1849 ArgumentMovesPass insertArgMoves
;
1850 BuildIntervalsPass buildIntervals
;
1851 SpillCodeInserter
insertSpills(func
);
1853 GCRA
gcra(func
, insertSpills
);
1855 unsigned int i
, retries
;
1858 if (!func
->ins
.empty()) {
1859 // Insert a nop at the entry so inputs only used by the first instruction
1860 // don't count as having an empty live range.
1861 Instruction
*nop
= new_Instruction(func
, OP_NOP
, TYPE_NONE
);
1862 BasicBlock::get(func
->cfg
.getRoot())->insertHead(nop
);
1865 ret
= insertConstr
.exec(func
);
1869 ret
= insertPhiMoves
.run(func
);
1873 ret
= insertArgMoves
.run(func
);
1877 // TODO: need to fix up spill slot usage ranges to support > 1 retry
1878 for (retries
= 0; retries
< 3; ++retries
) {
1879 if (retries
&& (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
))
1880 INFO("Retry: %i\n", retries
);
1881 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
)
1884 // spilling to registers may add live ranges, need to rebuild everything
1886 for (sequence
= func
->cfg
.nextSequence(), i
= 0;
1887 ret
&& i
<= func
->loopNestingBound
;
1888 sequence
= func
->cfg
.nextSequence(), ++i
)
1889 ret
= buildLiveSets(BasicBlock::get(func
->cfg
.getRoot()));
1891 for (ArrayList::Iterator bi
= func
->allBBlocks
.iterator();
1892 !bi
.end(); bi
.next())
1893 BasicBlock::get(bi
)->liveSet
.marker
= false;
1896 func
->orderInstructions(this->insns
);
1898 ret
= buildIntervals
.run(func
);
1901 ret
= gcra
.allocateRegisters(insns
);
1905 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RegAlloc done: %i\n", ret
);
1907 func
->tlsSize
= insertSpills
.getStackSize();
1912 // TODO: check if modifying Instruction::join here breaks anything
1914 GCRA::resolveSplitsAndMerges()
1916 for (std::list
<Instruction
*>::iterator it
= splits
.begin();
1919 Instruction
*split
= *it
;
1920 unsigned int reg
= regs
.idToBytes(split
->getSrc(0));
1921 for (int d
= 0; split
->defExists(d
); ++d
) {
1922 Value
*v
= split
->getDef(d
);
1923 v
->reg
.data
.id
= regs
.bytesToId(v
, reg
);
1930 for (std::list
<Instruction
*>::iterator it
= merges
.begin();
1933 Instruction
*merge
= *it
;
1934 unsigned int reg
= regs
.idToBytes(merge
->getDef(0));
1935 for (int s
= 0; merge
->srcExists(s
); ++s
) {
1936 Value
*v
= merge
->getSrc(s
);
1937 v
->reg
.data
.id
= regs
.bytesToId(v
, reg
);
1939 // If the value is defined by a phi/union node, we also need to
1940 // perform the same fixup on that node's sources, since after RA
1941 // their registers should be identical.
1942 if (v
->getInsn()->op
== OP_PHI
|| v
->getInsn()->op
== OP_UNION
) {
1943 Instruction
*phi
= v
->getInsn();
1944 for (int phis
= 0; phi
->srcExists(phis
); ++phis
) {
1945 phi
->getSrc(phis
)->join
= v
;
1946 phi
->getSrc(phis
)->reg
.data
.id
= v
->reg
.data
.id
;
1955 bool Program::registerAllocation()
1962 RegAlloc::InsertConstraintsPass::exec(Function
*ir
)
1966 bool ret
= run(ir
, true, true);
1968 ret
= insertConstraintMoves();
1972 // TODO: make part of texture insn
1974 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction
*tex
)
1980 for (d
= 0, k
= 0, c
= 0; c
< 4; ++c
) {
1981 if (!(tex
->tex
.mask
& (1 << c
)))
1983 if (tex
->getDef(k
)->refCount()) {
1985 def
[d
++] = tex
->getDef(k
);
1989 tex
->tex
.mask
= mask
;
1991 for (c
= 0; c
< d
; ++c
)
1992 tex
->setDef(c
, def
[c
]);
1994 tex
->setDef(c
, NULL
);
1998 RegAlloc::InsertConstraintsPass::detectConflict(Instruction
*cst
, int s
)
2000 Value
*v
= cst
->getSrc(s
);
2002 // current register allocation can't handle it if a value participates in
2003 // multiple constraints
2004 for (Value::UseIterator it
= v
->uses
.begin(); it
!= v
->uses
.end(); ++it
) {
2005 if (cst
!= (*it
)->getInsn())
2009 // can start at s + 1 because detectConflict is called on all sources
2010 for (int c
= s
+ 1; cst
->srcExists(c
); ++c
)
2011 if (v
== cst
->getSrc(c
))
2014 Instruction
*defi
= v
->getInsn();
2016 return (!defi
|| defi
->constrainedDefs());
2020 RegAlloc::InsertConstraintsPass::addConstraint(Instruction
*i
, int s
, int n
)
2025 // first, look for an existing identical constraint op
2026 for (std::list
<Instruction
*>::iterator it
= constrList
.begin();
2027 it
!= constrList
.end();
2030 if (!i
->bb
->dominatedBy(cst
->bb
))
2032 for (d
= 0; d
< n
; ++d
)
2033 if (cst
->getSrc(d
) != i
->getSrc(d
+ s
))
2036 for (d
= 0; d
< n
; ++d
, ++s
)
2037 i
->setSrc(s
, cst
->getDef(d
));
2041 cst
= new_Instruction(func
, OP_CONSTRAINT
, i
->dType
);
2043 for (d
= 0; d
< n
; ++s
, ++d
) {
2044 cst
->setDef(d
, new_LValue(func
, FILE_GPR
));
2045 cst
->setSrc(d
, i
->getSrc(s
));
2046 i
->setSrc(s
, cst
->getDef(d
));
2048 i
->bb
->insertBefore(i
, cst
);
2050 constrList
.push_back(cst
);
2053 // Add a dummy use of the pointer source of >= 8 byte loads after the load
2054 // to prevent it from being assigned a register which overlapping the load's
2055 // destination, which would produce random corruptions.
2057 RegAlloc::InsertConstraintsPass::addHazard(Instruction
*i
, const ValueRef
*src
)
2059 Instruction
*hzd
= new_Instruction(func
, OP_NOP
, TYPE_NONE
);
2060 hzd
->setSrc(0, src
->get());
2061 i
->bb
->insertAfter(i
, hzd
);
2065 // b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q
2067 RegAlloc::InsertConstraintsPass::condenseDefs(Instruction
*insn
)
2070 for (n
= 0; insn
->defExists(n
) && insn
->def(n
).getFile() == FILE_GPR
; ++n
);
2071 condenseDefs(insn
, 0, n
- 1);
2075 RegAlloc::InsertConstraintsPass::condenseDefs(Instruction
*insn
,
2076 const int a
, const int b
)
2081 for (int s
= a
; s
<= b
; ++s
)
2082 size
+= insn
->getDef(s
)->reg
.size
;
2086 LValue
*lval
= new_LValue(func
, FILE_GPR
);
2087 lval
->reg
.size
= size
;
2089 Instruction
*split
= new_Instruction(func
, OP_SPLIT
, typeOfSize(size
));
2090 split
->setSrc(0, lval
);
2091 for (int d
= a
; d
<= b
; ++d
) {
2092 split
->setDef(d
- a
, insn
->getDef(d
));
2093 insn
->setDef(d
, NULL
);
2095 insn
->setDef(a
, lval
);
2097 for (int k
= a
+ 1, d
= b
+ 1; insn
->defExists(d
); ++d
, ++k
) {
2098 insn
->setDef(k
, insn
->getDef(d
));
2099 insn
->setDef(d
, NULL
);
2101 // carry over predicate if any (mainly for OP_UNION uses)
2102 split
->setPredicate(insn
->cc
, insn
->getPredicate());
2104 insn
->bb
->insertAfter(insn
, split
);
2105 constrList
.push_back(split
);
2109 RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction
*insn
,
2110 const int a
, const int b
)
2115 for (int s
= a
; s
<= b
; ++s
)
2116 size
+= insn
->getSrc(s
)->reg
.size
;
2119 LValue
*lval
= new_LValue(func
, FILE_GPR
);
2120 lval
->reg
.size
= size
;
2123 insn
->takeExtraSources(0, save
);
2125 Instruction
*merge
= new_Instruction(func
, OP_MERGE
, typeOfSize(size
));
2126 merge
->setDef(0, lval
);
2127 for (int s
= a
, i
= 0; s
<= b
; ++s
, ++i
) {
2128 merge
->setSrc(i
, insn
->getSrc(s
));
2130 insn
->moveSources(b
+ 1, a
- b
);
2131 insn
->setSrc(a
, lval
);
2132 insn
->bb
->insertBefore(insn
, merge
);
2134 insn
->putExtraSources(0, save
);
2136 constrList
.push_back(merge
);
2140 RegAlloc::InsertConstraintsPass::isScalarTexGM107(TexInstruction
*tex
)
2142 if (tex
->tex
.sIndirectSrc
>= 0 ||
2143 tex
->tex
.rIndirectSrc
>= 0 ||
2147 if (tex
->tex
.mask
== 5 || tex
->tex
.mask
== 6)
2186 // TLD4S: all 2D/RECT variants and only offset
2190 if (tex
->tex
.useOffsets
)
2193 switch (tex
->tex
.target
.getEnum()) {
2195 case TEX_TARGET_2D_ARRAY_SHADOW
:
2196 return tex
->tex
.levelZero
;
2197 case TEX_TARGET_CUBE
:
2198 return !tex
->tex
.levelZero
;
2200 case TEX_TARGET_2D_ARRAY
:
2201 case TEX_TARGET_2D_SHADOW
:
2203 case TEX_TARGET_RECT
:
2204 case TEX_TARGET_RECT_SHADOW
:
2211 if (tex
->tex
.useOffsets
)
2214 switch (tex
->tex
.target
.getEnum()) {
2216 case TEX_TARGET_2D_SHADOW
:
2217 case TEX_TARGET_RECT
:
2218 case TEX_TARGET_RECT_SHADOW
:
2219 case TEX_TARGET_CUBE
:
2226 switch (tex
->tex
.target
.getEnum()) {
2228 return !tex
->tex
.useOffsets
;
2230 case TEX_TARGET_RECT
:
2232 case TEX_TARGET_2D_ARRAY
:
2233 case TEX_TARGET_2D_MS
:
2235 return !tex
->tex
.useOffsets
&& tex
->tex
.levelZero
;
2241 if (tex
->tex
.useOffsets
> 1)
2243 if (tex
->tex
.mask
!= 0x3 && tex
->tex
.mask
!= 0xf)
2246 switch (tex
->tex
.target
.getEnum()) {
2248 case TEX_TARGET_2D_MS
:
2249 case TEX_TARGET_2D_SHADOW
:
2250 case TEX_TARGET_RECT
:
2251 case TEX_TARGET_RECT_SHADOW
:
2263 RegAlloc::InsertConstraintsPass::handleScalarTexGM107(TexInstruction
*tex
)
2265 int defCount
= tex
->defCount(0xff);
2266 int srcCount
= tex
->srcCount(0xff);
2268 tex
->tex
.scalar
= true;
2272 condenseDefs(tex
, 2, 3);
2274 condenseDefs(tex
, 0, 1);
2277 // special case for TXF.A2D
2278 if (tex
->op
== OP_TXF
&& tex
->tex
.target
== TEX_TARGET_2D_ARRAY
) {
2279 assert(srcCount
>= 3);
2280 condenseSrcs(tex
, 1, 2);
2283 condenseSrcs(tex
, 2, 3);
2284 // only if we have more than 2 sources
2286 condenseSrcs(tex
, 0, 1);
2289 assert(!tex
->defExists(2) && !tex
->srcExists(2));
2293 RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction
*tex
)
2297 if (isTextureOp(tex
->op
))
2300 if (isScalarTexGM107(tex
)) {
2301 handleScalarTexGM107(tex
);
2305 assert(!tex
->tex
.scalar
);
2308 if (isSurfaceOp(tex
->op
)) {
2309 int s
= tex
->tex
.target
.getDim() +
2310 (tex
->tex
.target
.isArray() || tex
->tex
.target
.isCube());
2320 if (tex
->subOp
== NV50_IR_SUBOP_ATOM_CAS
)
2328 condenseSrcs(tex
, 0, s
- 1);
2330 condenseSrcs(tex
, 1, n
); // do not condense the tex handle
2332 if (isTextureOp(tex
->op
)) {
2333 if (tex
->op
!= OP_TXQ
) {
2334 s
= tex
->tex
.target
.getArgCount() - tex
->tex
.target
.isMS();
2335 if (tex
->op
== OP_TXD
) {
2336 // Indirect handle belongs in the first arg
2337 if (tex
->tex
.rIndirectSrc
>= 0)
2339 if (!tex
->tex
.target
.isArray() && tex
->tex
.useOffsets
)
2342 n
= tex
->srcCount(0xff, true) - s
;
2343 // TODO: Is this necessary? Perhaps just has to be aligned to the
2344 // level that the first arg is, not necessarily to 4. This
2345 // requirement has not been rigorously verified, as it has been on
2347 if (n
> 0 && n
< 3) {
2348 if (tex
->srcExists(n
+ s
)) // move potential predicate out of the way
2349 tex
->moveSources(n
+ s
, 3 - n
);
2351 tex
->setSrc(s
+ n
++, new_LValue(func
, FILE_GPR
));
2354 s
= tex
->srcCount(0xff, true);
2359 condenseSrcs(tex
, 0, s
- 1);
2360 if (n
> 1) // NOTE: first call modified positions already
2361 condenseSrcs(tex
, 1, n
);
2366 RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction
*tex
)
2368 if (isTextureOp(tex
->op
))
2372 if (tex
->op
== OP_SUSTB
|| tex
->op
== OP_SUSTP
) {
2373 condenseSrcs(tex
, 3, 6);
2375 if (isTextureOp(tex
->op
)) {
2376 int n
= tex
->srcCount(0xff, true);
2377 int s
= n
> 4 ? 4 : n
;
2378 if (n
> 4 && n
< 7) {
2379 if (tex
->srcExists(n
)) // move potential predicate out of the way
2380 tex
->moveSources(n
, 7 - n
);
2383 tex
->setSrc(n
++, new_LValue(func
, FILE_GPR
));
2386 condenseSrcs(tex
, 0, s
- 1);
2388 condenseSrcs(tex
, 1, n
- s
);
2393 RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction
*tex
)
2397 if (isTextureOp(tex
->op
))
2400 if (tex
->op
== OP_TXQ
) {
2401 s
= tex
->srcCount(0xff);
2403 } else if (isSurfaceOp(tex
->op
)) {
2404 s
= tex
->tex
.target
.getDim() + (tex
->tex
.target
.isArray() || tex
->tex
.target
.isCube());
2405 if (tex
->op
== OP_SUSTB
|| tex
->op
== OP_SUSTP
)
2410 s
= tex
->tex
.target
.getArgCount() - tex
->tex
.target
.isMS();
2411 if (!tex
->tex
.target
.isArray() &&
2412 (tex
->tex
.rIndirectSrc
>= 0 || tex
->tex
.sIndirectSrc
>= 0))
2414 if (tex
->op
== OP_TXD
&& tex
->tex
.useOffsets
)
2416 n
= tex
->srcCount(0xff) - s
;
2421 condenseSrcs(tex
, 0, s
- 1);
2422 if (n
> 1) // NOTE: first call modified positions already
2423 condenseSrcs(tex
, 1, n
);
2429 RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction
*tex
)
2431 Value
*pred
= tex
->getPredicate();
2433 tex
->setPredicate(tex
->cc
, NULL
);
2437 assert(tex
->defExists(0) && tex
->srcExists(0));
2438 // make src and def count match
2440 for (c
= 0; tex
->srcExists(c
) || tex
->defExists(c
); ++c
) {
2441 if (!tex
->srcExists(c
))
2442 tex
->setSrc(c
, new_LValue(func
, tex
->getSrc(0)->asLValue()));
2444 insertConstraintMove(tex
, c
);
2445 if (!tex
->defExists(c
))
2446 tex
->setDef(c
, new_LValue(func
, tex
->getDef(0)->asLValue()));
2449 tex
->setPredicate(tex
->cc
, pred
);
2451 condenseSrcs(tex
, 0, c
- 1);
2454 // Insert constraint markers for instructions whose multiple sources must be
2455 // located in consecutive registers.
2457 RegAlloc::InsertConstraintsPass::visit(BasicBlock
*bb
)
2459 TexInstruction
*tex
;
2463 targ
= bb
->getProgram()->getTarget();
2465 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
2468 if ((tex
= i
->asTex())) {
2469 switch (targ
->getChipset() & ~0xf) {
2474 texConstraintNV50(tex
);
2478 texConstraintNVC0(tex
);
2483 texConstraintNVE0(tex
);
2488 texConstraintGM107(tex
);
2494 if (i
->op
== OP_EXPORT
|| i
->op
== OP_STORE
) {
2495 for (size
= typeSizeof(i
->dType
), s
= 1; size
> 0; ++s
) {
2496 assert(i
->srcExists(s
));
2497 size
-= i
->getSrc(s
)->reg
.size
;
2499 condenseSrcs(i
, 1, s
- 1);
2501 if (i
->op
== OP_LOAD
|| i
->op
== OP_VFETCH
) {
2503 if (i
->src(0).isIndirect(0) && typeSizeof(i
->dType
) >= 8)
2504 addHazard(i
, i
->src(0).getIndirect(0));
2505 if (i
->src(0).isIndirect(1) && typeSizeof(i
->dType
) >= 8)
2506 addHazard(i
, i
->src(0).getIndirect(1));
2508 if (i
->op
== OP_UNION
||
2509 i
->op
== OP_MERGE
||
2510 i
->op
== OP_SPLIT
) {
2511 constrList
.push_back(i
);
2518 RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction
*cst
, int s
)
2520 const uint8_t size
= cst
->src(s
).getSize();
2522 assert(cst
->getSrc(s
)->defs
.size() == 1); // still SSA
2524 Instruction
*defi
= cst
->getSrc(s
)->defs
.front()->getInsn();
2526 bool imm
= defi
->op
== OP_MOV
&&
2527 defi
->src(0).getFile() == FILE_IMMEDIATE
;
2528 bool load
= defi
->op
== OP_LOAD
&&
2529 defi
->src(0).getFile() == FILE_MEMORY_CONST
&&
2530 !defi
->src(0).isIndirect(0);
2531 // catch some cases where don't really need MOVs
2532 if (cst
->getSrc(s
)->refCount() == 1 && !defi
->constrainedDefs()) {
2534 // Move the defi right before the cst. No point in expanding
2536 defi
->bb
->remove(defi
);
2537 cst
->bb
->insertBefore(cst
, defi
);
2542 LValue
*lval
= new_LValue(func
, cst
->src(s
).getFile());
2543 lval
->reg
.size
= size
;
2545 Instruction
*mov
= new_Instruction(func
, OP_MOV
, typeOfSize(size
));
2546 mov
->setDef(0, lval
);
2547 mov
->setSrc(0, cst
->getSrc(s
));
2551 mov
->setSrc(0, defi
->getSrc(0));
2553 mov
->setSrc(0, defi
->getSrc(0));
2556 if (defi
->getPredicate())
2557 mov
->setPredicate(defi
->cc
, defi
->getPredicate());
2559 cst
->setSrc(s
, mov
->getDef(0));
2560 cst
->bb
->insertBefore(cst
, mov
);
2562 cst
->getDef(0)->asLValue()->noSpill
= 1; // doesn't help
2565 // Insert extra moves so that, if multiple register constraints on a value are
2566 // in conflict, these conflicts can be resolved.
2568 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
2570 for (std::list
<Instruction
*>::iterator it
= constrList
.begin();
2571 it
!= constrList
.end();
2573 Instruction
*cst
= *it
;
2576 if (cst
->op
== OP_SPLIT
&& 0) {
2577 // spilling splits is annoying, just make sure they're separate
2578 for (int d
= 0; cst
->defExists(d
); ++d
) {
2579 if (!cst
->getDef(d
)->refCount())
2581 LValue
*lval
= new_LValue(func
, cst
->def(d
).getFile());
2582 const uint8_t size
= cst
->def(d
).getSize();
2583 lval
->reg
.size
= size
;
2585 mov
= new_Instruction(func
, OP_MOV
, typeOfSize(size
));
2586 mov
->setSrc(0, lval
);
2587 mov
->setDef(0, cst
->getDef(d
));
2588 cst
->setDef(d
, mov
->getSrc(0));
2589 cst
->bb
->insertAfter(cst
, mov
);
2591 cst
->getSrc(0)->asLValue()->noSpill
= 1;
2592 mov
->getSrc(0)->asLValue()->noSpill
= 1;
2595 if (cst
->op
== OP_MERGE
|| cst
->op
== OP_UNION
) {
2596 for (int s
= 0; cst
->srcExists(s
); ++s
) {
2597 const uint8_t size
= cst
->src(s
).getSize();
2599 if (!cst
->getSrc(s
)->defs
.size()) {
2600 mov
= new_Instruction(func
, OP_NOP
, typeOfSize(size
));
2601 mov
->setDef(0, cst
->getSrc(s
));
2602 cst
->bb
->insertBefore(cst
, mov
);
2606 insertConstraintMove(cst
, s
);
2614 } // namespace nv50_ir