2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nv50_ir_target.h"
26 #include "nv50/nv50_debug.h"
30 #define MAX_REGISTER_FILE_SIZE 256
36 RegisterSet(const Target
*);
38 void init(const Target
*);
39 void reset(); // reset allocation status, but not max assigned regs
41 void periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
);
42 void intersect(DataFile f
, const RegisterSet
*);
44 bool assign(Value
**, int nr
);
45 void release(const Value
*);
46 void occupy(const Value
*);
48 int getMaxAssigned(DataFile f
) const { return fill
[f
]; }
53 uint32_t bits
[FILE_ADDRESS
+ 1][(MAX_REGISTER_FILE_SIZE
+ 31) / 32];
55 int unit
[FILE_ADDRESS
+ 1]; // log2 of allocation granularity
57 int last
[FILE_ADDRESS
+ 1];
58 int fill
[FILE_ADDRESS
+ 1];
64 memset(bits
, 0, sizeof(bits
));
67 RegisterSet::RegisterSet()
73 RegisterSet::init(const Target
*targ
)
75 for (unsigned int rf
= 0; rf
<= FILE_ADDRESS
; ++rf
) {
76 DataFile f
= static_cast<DataFile
>(rf
);
77 last
[rf
] = targ
->getFileSize(f
) - 1;
78 unit
[rf
] = targ
->getFileUnit(f
);
80 assert(last
[rf
] < MAX_REGISTER_FILE_SIZE
);
84 RegisterSet::RegisterSet(const Target
*targ
)
91 RegisterSet::periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
)
93 for (int i
= 0; i
< (last
[f
] + 31) / 32; ++i
)
94 bits
[f
][i
] = (bits
[f
][i
] | lock
) & ~unlock
;
98 RegisterSet::intersect(DataFile f
, const RegisterSet
*set
)
100 for (int i
= 0; i
< (last
[f
] + 31) / 32; ++i
)
101 bits
[f
][i
] |= set
->bits
[f
][i
];
105 RegisterSet::print() const
108 for (int i
= 0; i
< (last
[FILE_GPR
] + 31) / 32; ++i
)
109 INFO(" %08x", bits
[FILE_GPR
][i
]);
114 RegisterSet::assign(Value
**def
, int nr
)
116 DataFile f
= def
[0]->reg
.file
;
120 int s
= (n
* def
[0]->reg
.size
) >> unit
[f
];
121 uint32_t m
= (1 << s
) - 1;
123 int id
= last
[f
] + 1;
126 for (i
= 0; (i
* 32) < last
[f
]; ++i
) {
127 if (bits
[f
][i
] == 0xffffffff)
130 for (id
= 0; id
< 32; id
+= s
)
131 if (!(bits
[f
][i
] & (m
<< id
)))
140 bits
[f
][id
/ 32] |= m
<< (id
% 32);
142 if (id
+ (s
- 1) > fill
[f
])
143 fill
[f
] = id
+ (s
- 1);
145 for (i
= 0; i
< nr
; ++i
, ++id
)
146 if (!def
[i
]->livei
.isEmpty()) // XXX: really increased id if empty ?
147 def
[i
]->reg
.data
.id
= id
;
152 RegisterSet::occupy(const Value
*val
)
154 int id
= val
->reg
.data
.id
;
157 unsigned int f
= val
->reg
.file
;
159 uint32_t m
= (1 << (val
->reg
.size
>> unit
[f
])) - 1;
161 INFO_DBG(0, REG_ALLOC
, "reg occupy: %u[%i] %x\n", f
, id
, m
);
163 bits
[f
][id
/ 32] |= m
<< (id
% 32);
170 RegisterSet::release(const Value
*val
)
172 int id
= val
->reg
.data
.id
;
175 unsigned int f
= val
->reg
.file
;
177 uint32_t m
= (1 << (val
->reg
.size
>> unit
[f
])) - 1;
179 INFO_DBG(0, REG_ALLOC
, "reg release: %u[%i] %x\n", f
, id
, m
);
181 bits
[f
][id
/ 32] &= ~(m
<< (id
% 32));
184 #define JOIN_MASK_PHI (1 << 0)
185 #define JOIN_MASK_UNION (1 << 1)
186 #define JOIN_MASK_MOV (1 << 2)
187 #define JOIN_MASK_TEX (1 << 3)
188 #define JOIN_MASK_CONSTRAINT (1 << 4)
193 RegAlloc(Program
*program
) : prog(program
), sequence(0) { }
199 bool coalesceValues(unsigned int mask
);
201 bool allocateConstrainedValues();
204 class PhiMovesPass
: public Pass
{
206 virtual bool visit(BasicBlock
*);
207 inline bool needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
);
210 class BuildIntervalsPass
: public Pass
{
212 virtual bool visit(BasicBlock
*);
213 void collectLiveValues(BasicBlock
*);
214 void addLiveRange(Value
*, const BasicBlock
*, int end
);
217 class InsertConstraintsPass
: public Pass
{
219 bool exec(Function
*func
);
221 virtual bool visit(BasicBlock
*);
223 bool insertConstraintMoves();
225 void addHazard(Instruction
*i
, const ValueRef
*src
);
226 void textureMask(TexInstruction
*);
227 void addConstraint(Instruction
*, int s
, int n
);
228 bool detectConflict(Instruction
*, int s
);
233 bool buildLiveSets(BasicBlock
*);
234 void collectLValues(DLList
&, bool assignedOnly
);
236 void insertOrderedTail(DLList
&, Value
*);
237 inline Instruction
*insnBySerial(int);
243 // instructions in control flow / chronological order
246 int sequence
; // for manual passes through CFG
250 RegAlloc::insnBySerial(int serial
)
252 return reinterpret_cast<Instruction
*>(insns
.get(serial
));
256 RegAlloc::BuildIntervalsPass::addLiveRange(Value
*val
,
257 const BasicBlock
*bb
,
260 Instruction
*insn
= val
->getUniqueInsn();
264 assert(bb
->getFirst()->serial
<= bb
->getExit()->serial
);
265 assert(bb
->getExit()->serial
+ 1 >= end
);
267 int begin
= insn
->serial
;
268 if (begin
< bb
->getEntry()->serial
|| begin
> bb
->getExit()->serial
)
269 begin
= bb
->getEntry()->serial
;
271 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "%%%i <- live range [%i(%i), %i)\n",
272 val
->id
, begin
, insn
->serial
, end
);
274 if (begin
!= end
) // empty ranges are only added as hazards for fixed regs
275 val
->livei
.extend(begin
, end
);
279 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
)
281 if (b
->cfg
.incidentCount() <= 1)
285 for (Graph::EdgeIterator ei
= p
->cfg
.outgoing(); !ei
.end(); ei
.next())
286 if (ei
.getType() == Graph::Edge::TREE
||
287 ei
.getType() == Graph::Edge::FORWARD
)
292 // For each operand of each PHI in b, generate a new value by inserting a MOV
293 // at the end of the block it is coming from and replace the operand with its
294 // result. This eliminates liveness conflicts and enables us to let values be
295 // copied to the right register if such a conflict exists nonetheless.
297 // These MOVs are also crucial in making sure the live intervals of phi srces
298 // are extended until the end of the loop, since they are not included in the
301 RegAlloc::PhiMovesPass::visit(BasicBlock
*bb
)
303 Instruction
*phi
, *mov
;
306 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
307 pb
= pn
= BasicBlock::get(ei
.getNode());
310 if (needNewElseBlock(bb
, pb
)) {
311 pn
= new BasicBlock(func
);
313 // deletes an edge, iterator is invalid after this:
314 pb
->cfg
.detach(&bb
->cfg
);
315 pb
->cfg
.attach(&pn
->cfg
, Graph::Edge::TREE
);
316 pn
->cfg
.attach(&bb
->cfg
, Graph::Edge::FORWARD
); // XXX: check order !
318 assert(pb
->getExit()->op
!= OP_CALL
);
319 if (pb
->getExit()->asFlow()->target
.bb
== bb
)
320 pb
->getExit()->asFlow()->target
.bb
= pn
;
325 // insert MOVs (phi->src[j] should stem from j-th in-BB)
327 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
328 pb
= BasicBlock::get(ei
.getNode());
329 if (!pb
->isTerminated())
330 pb
->insertTail(new_FlowInstruction(func
, OP_BRA
, bb
));
332 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= phi
->next
) {
333 mov
= new_Instruction(func
, OP_MOV
, TYPE_U32
);
335 mov
->setSrc(0, phi
->getSrc(j
));
336 mov
->setDef(0, new_LValue(func
, phi
->getDef(0)->asLValue()));
337 phi
->setSrc(j
, mov
->getDef(0));
339 pb
->insertBefore(pb
->getExit(), mov
);
347 // Build the set of live-in variables of bb.
349 RegAlloc::buildLiveSets(BasicBlock
*bb
)
355 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "buildLiveSets(BB:%i)\n", bb
->getId());
357 bb
->liveSet
.allocate(func
->allLValues
.getSize(), false);
360 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
361 bn
= BasicBlock::get(ei
.getNode());
364 if (bn
->cfg
.visit(sequence
))
365 if (!buildLiveSets(bn
))
368 bb
->liveSet
= bn
->liveSet
;
370 bb
->liveSet
|= bn
->liveSet
;
372 if (!n
&& !bb
->liveSet
.marker
)
374 bb
->liveSet
.marker
= true;
376 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
377 INFO("BB:%i live set of out blocks:\n", bb
->getId());
381 // if (!bb->getEntry())
384 for (i
= bb
->getExit(); i
&& i
!= bb
->getEntry()->prev
; i
= i
->prev
) {
385 for (d
= 0; i
->defExists(d
); ++d
)
386 bb
->liveSet
.clr(i
->getDef(d
)->id
);
387 for (s
= 0; i
->srcExists(s
); ++s
)
388 if (i
->getSrc(s
)->asLValue())
389 bb
->liveSet
.set(i
->getSrc(s
)->id
);
391 for (i
= bb
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
)
392 bb
->liveSet
.clr(i
->getDef(0)->id
);
394 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
395 INFO("BB:%i live set after propagation:\n", bb
->getId());
403 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock
*bb
)
405 BasicBlock
*bbA
= NULL
, *bbB
= NULL
;
407 assert(bb
->cfg
.incidentCount() || bb
->liveSet
.popCount() == 0);
409 if (bb
->cfg
.outgoingCount()) {
410 // trickery to save a loop of OR'ing liveSets
411 // aliasing works fine with BitSet::setOr
412 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
413 if (ei
.getType() == Graph::Edge::DUMMY
)
416 bb
->liveSet
.setOr(&bbA
->liveSet
, &bbB
->liveSet
);
421 bbB
= BasicBlock::get(ei
.getNode());
423 bb
->liveSet
.setOr(&bbB
->liveSet
, bbA
? &bbA
->liveSet
: NULL
);
425 if (bb
->cfg
.incidentCount()) {
431 RegAlloc::BuildIntervalsPass::visit(BasicBlock
*bb
)
433 collectLiveValues(bb
);
435 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "BuildIntervals(BB:%i)\n", bb
->getId());
437 // go through out blocks and delete phi sources that do not originate from
438 // the current block from the live set
439 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
440 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
442 for (Instruction
*i
= out
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
) {
443 bb
->liveSet
.clr(i
->getDef(0)->id
);
445 for (int s
= 0; s
< NV50_IR_MAX_SRCS
&& i
->src
[s
].exists(); ++s
) {
446 assert(i
->src
[s
].getInsn());
447 if (i
->getSrc(s
)->getUniqueInsn()->bb
== bb
) // XXX: reachableBy ?
448 bb
->liveSet
.set(i
->getSrc(s
)->id
);
450 bb
->liveSet
.clr(i
->getSrc(s
)->id
);
455 // remaining live-outs are live until end
457 for (unsigned int j
= 0; j
< bb
->liveSet
.getSize(); ++j
)
458 if (bb
->liveSet
.test(j
))
459 addLiveRange(func
->getLValue(j
), bb
, bb
->getExit()->serial
+ 1);
462 for (Instruction
*i
= bb
->getExit(); i
&& i
->op
!= OP_PHI
; i
= i
->prev
) {
463 for (int d
= 0; i
->defExists(d
); ++d
) {
464 bb
->liveSet
.clr(i
->getDef(d
)->id
);
465 if (i
->getDef(d
)->reg
.data
.id
>= 0) // add hazard for fixed regs
466 i
->getDef(d
)->livei
.extend(i
->serial
, i
->serial
);
469 for (int s
= 0; i
->srcExists(s
); ++s
) {
470 if (!i
->getSrc(s
)->asLValue())
472 if (!bb
->liveSet
.test(i
->getSrc(s
)->id
)) {
473 bb
->liveSet
.set(i
->getSrc(s
)->id
);
474 addLiveRange(i
->getSrc(s
), bb
, i
->serial
);
483 RegAlloc::coalesceValues(unsigned int mask
)
487 for (n
= 0; n
< insns
.getSize(); ++n
) {
489 Instruction
*insn
= insnBySerial(n
);
493 if (!(mask
& JOIN_MASK_PHI
))
495 for (c
= 0; insn
->srcExists(c
); ++c
)
496 if (!insn
->getDef(0)->coalesce(insn
->getSrc(c
), false)) {
497 ERROR("failed to coalesce phi operands\n");
502 if (!(mask
& JOIN_MASK_UNION
))
504 for (c
= 0; insn
->srcExists(c
); ++c
)
505 insn
->getDef(0)->coalesce(insn
->getSrc(c
), true);
508 if (!(mask
& JOIN_MASK_CONSTRAINT
))
510 for (c
= 0; c
< 4 && insn
->srcExists(c
); ++c
)
511 insn
->getDef(c
)->coalesce(insn
->getSrc(c
), true);
514 if (!(mask
& JOIN_MASK_MOV
))
516 i
= insn
->getSrc(0)->getUniqueInsn();
517 if (i
&& !i
->constrainedDefs())
518 insn
->getDef(0)->coalesce(insn
->getSrc(0), false);
528 if (!(mask
& JOIN_MASK_TEX
))
530 for (c
= 0; c
< 4 && insn
->srcExists(c
); ++c
)
531 insn
->getDef(c
)->coalesce(insn
->getSrc(c
), true);
541 RegAlloc::insertOrderedTail(DLList
&list
, Value
*val
)
543 // we insert the live intervals in order, so this should be short
544 DLList::Iterator iter
= list
.revIterator();
545 const int begin
= val
->livei
.begin();
546 for (; !iter
.end(); iter
.next()) {
547 if (reinterpret_cast<Value
*>(iter
.get())->livei
.begin() <= begin
)
554 checkList(DLList
&list
)
559 for (DLList::Iterator iter
= list
.iterator(); !iter
.end(); iter
.next()) {
560 next
= Value::get(iter
);
563 assert(prev
->livei
.begin() <= next
->livei
.begin());
565 assert(next
->join
== next
);
571 RegAlloc::collectLValues(DLList
&list
, bool assignedOnly
)
573 for (int n
= 0; n
< insns
.getSize(); ++n
) {
574 Instruction
*i
= insnBySerial(n
);
576 for (int d
= 0; i
->defExists(d
); ++d
)
577 if (!i
->getDef(d
)->livei
.isEmpty())
578 if (!assignedOnly
|| i
->getDef(d
)->reg
.data
.id
>= 0)
579 insertOrderedTail(list
, i
->getDef(d
));
585 RegAlloc::allocateConstrainedValues()
588 RegisterSet regSet
[4];
591 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RA: allocating constrained values\n");
593 collectLValues(regVals
, true);
595 for (int c
= 0; c
< 4; ++c
)
596 regSet
[c
].init(prog
->getTarget());
598 for (int n
= 0; n
< insns
.getSize(); ++n
) {
599 Instruction
*i
= insnBySerial(n
);
601 const int vecSize
= i
->defCount(0xf);
604 assert(vecSize
<= 4);
606 for (int c
= 0; c
< vecSize
; ++c
)
607 defs
[c
] = i
->def
[c
].rep();
609 if (defs
[0]->reg
.data
.id
>= 0) {
610 for (int c
= 1; c
< vecSize
; ++c
) {
611 assert(defs
[c
]->reg
.data
.id
>= 0);
616 for (int c
= 0; c
< vecSize
; ++c
) {
620 for (DLList::Iterator it
= regVals
.iterator(); !it
.end(); it
.next()) {
621 Value
*rVal
= Value::get(it
);
622 if (rVal
->reg
.data
.id
>= 0 && rVal
->livei
.overlaps(defs
[c
]->livei
))
623 regSet
[c
].occupy(rVal
);
626 if (vecSize
== 2) // granularity is 2 instead of 4
627 mask
|= 0x11111111 << 2;
628 regSet
[c
].periodicMask(defs
[0]->reg
.file
, 0, ~(mask
<< c
));
630 if (!defs
[c
]->livei
.isEmpty())
631 insertOrderedTail(regVals
, defs
[c
]);
633 for (int c
= 1; c
< vecSize
; ++c
)
634 regSet
[0].intersect(defs
[0]->reg
.file
, ®Set
[c
]);
636 if (!regSet
[0].assign(&defs
[0], vecSize
)) // TODO: spilling
639 for (int c
= 0; c
< 4; c
+= 2)
640 if (regSet
[c
].getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
641 prog
->maxGPR
= regSet
[c
].getMaxAssigned(FILE_GPR
);
646 RegAlloc::linearScan()
649 DLList unhandled
, active
, inactive
;
650 RegisterSet
f(prog
->getTarget()), free(prog
->getTarget());
652 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RA: linear scan\n");
654 collectLValues(unhandled
, false);
656 for (DLList::Iterator cI
= unhandled
.iterator(); !cI
.end();) {
657 cur
= Value::get(cI
);
660 for (DLList::Iterator aI
= active
.iterator(); !aI
.end();) {
661 val
= Value::get(aI
);
662 if (val
->livei
.end() <= cur
->livei
.begin()) {
666 if (!val
->livei
.contains(cur
->livei
.begin())) {
668 aI
.moveToList(inactive
);
674 for (DLList::Iterator iI
= inactive
.iterator(); !iI
.end();) {
675 val
= Value::get(iI
);
676 if (val
->livei
.end() <= cur
->livei
.begin()) {
679 if (val
->livei
.contains(cur
->livei
.begin())) {
681 iI
.moveToList(active
);
688 for (DLList::Iterator iI
= inactive
.iterator(); !iI
.end(); iI
.next()) {
689 val
= Value::get(iI
);
690 if (val
->livei
.overlaps(cur
->livei
))
694 for (DLList::Iterator uI
= unhandled
.iterator(); !uI
.end(); uI
.next()) {
695 val
= Value::get(uI
);
696 if (val
->reg
.data
.id
>= 0 && val
->livei
.overlaps(cur
->livei
))
700 if (cur
->reg
.data
.id
< 0) {
701 bool spill
= !f
.assign(&cur
, 1);
703 ERROR("out of registers of file %u\n", cur
->reg
.file
);
711 if (f
.getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
712 prog
->maxGPR
= f
.getMaxAssigned(FILE_GPR
);
713 if (free
.getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
714 prog
->maxGPR
= free
.getMaxAssigned(FILE_GPR
);
721 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
722 !fi
.end(); fi
.next()) {
723 func
= reinterpret_cast<Function
*>(fi
.get());
733 InsertConstraintsPass insertConstr
;
734 PhiMovesPass insertMoves
;
735 BuildIntervalsPass buildIntervals
;
740 ret
= insertConstr
.exec(func
);
744 ret
= insertMoves
.run(func
);
748 for (sequence
= func
->cfg
.nextSequence(), i
= 0;
749 ret
&& i
<= func
->loopNestingBound
;
750 sequence
= func
->cfg
.nextSequence(), ++i
)
751 ret
= buildLiveSets(BasicBlock::get(func
->cfg
.getRoot()));
755 func
->orderInstructions(this->insns
);
757 ret
= buildIntervals
.run(func
);
761 ret
= coalesceValues(JOIN_MASK_PHI
);
764 switch (prog
->getTarget()->getChipset() & 0xf0) {
766 ret
= coalesceValues(JOIN_MASK_UNION
| JOIN_MASK_TEX
);
769 ret
= coalesceValues(JOIN_MASK_UNION
| JOIN_MASK_CONSTRAINT
);
776 ret
= coalesceValues(JOIN_MASK_MOV
);
780 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
782 func
->printLiveIntervals();
785 ret
= allocateConstrainedValues() && linearScan();
790 // TODO: should probably call destructor on LValues later instead
791 for (ArrayList::Iterator it
= func
->allLValues
.iterator();
792 !it
.end(); it
.next())
793 reinterpret_cast<LValue
*>(it
.get())->livei
.clear();
798 bool Program::registerAllocation()
805 RegAlloc::InsertConstraintsPass::exec(Function
*ir
)
809 bool ret
= run(ir
, true, true);
811 ret
= insertConstraintMoves();
815 // TODO: make part of texture insn
817 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction
*tex
)
823 for (d
= 0, k
= 0, c
= 0; c
< 4; ++c
) {
824 if (!(tex
->tex
.mask
& (1 << c
)))
826 if (tex
->getDef(k
)->refCount()) {
828 def
[d
++] = tex
->getDef(k
);
832 tex
->tex
.mask
= mask
;
834 #if 0 // reorder or set the unused ones NULL ?
835 for (c
= 0; c
< 4; ++c
)
836 if (!(tex
->tex
.mask
& (1 << c
)))
837 def
[d
++] = tex
->getDef(c
);
839 for (c
= 0; c
< d
; ++c
)
840 tex
->setDef(c
, def
[c
]);
843 tex
->setDef(c
, NULL
);
848 RegAlloc::InsertConstraintsPass::detectConflict(Instruction
*cst
, int s
)
850 // current register allocation can't handle it if a value participates in
851 // multiple constraints
852 for (ValueRef::Iterator it
= cst
->src
[s
].iterator(); !it
.end(); it
.next()) {
853 Instruction
*insn
= it
.get()->getInsn();
858 // can start at s + 1 because detectConflict is called on all sources
859 for (int c
= s
+ 1; cst
->srcExists(c
); ++c
)
860 if (cst
->getSrc(c
) == cst
->getSrc(s
))
863 Instruction
*defi
= cst
->getSrc(s
)->getInsn();
865 return (!defi
|| defi
->constrainedDefs());
869 RegAlloc::InsertConstraintsPass::addConstraint(Instruction
*i
, int s
, int n
)
874 // first, look for an existing identical constraint op
875 for (DLList::Iterator it
= constrList
.iterator(); !it
.end(); it
.next()) {
876 cst
= reinterpret_cast<Instruction
*>(it
.get());
877 if (!i
->bb
->dominatedBy(cst
->bb
))
879 for (d
= 0; d
< n
; ++d
)
880 if (cst
->getSrc(d
) != i
->getSrc(d
+ s
))
883 for (d
= 0; d
< n
; ++d
, ++s
)
884 i
->setSrc(s
, cst
->getDef(d
));
888 cst
= new_Instruction(func
, OP_CONSTRAINT
, i
->dType
);
890 for (d
= 0; d
< n
; ++s
, ++d
) {
891 cst
->setDef(d
, new_LValue(func
, FILE_GPR
));
892 cst
->setSrc(d
, i
->getSrc(s
));
893 i
->setSrc(s
, cst
->getDef(d
));
895 i
->bb
->insertBefore(i
, cst
);
897 constrList
.insert(cst
);
900 // Add a dummy use of the pointer source of >= 8 byte loads after the load
901 // to prevent it from being assigned a register which overlapping the load's
902 // destination, which would produce random corruptions.
904 RegAlloc::InsertConstraintsPass::addHazard(Instruction
*i
, const ValueRef
*src
)
906 Instruction
*hzd
= new_Instruction(func
, OP_NOP
, TYPE_NONE
);
907 hzd
->setSrc(0, src
->get());
908 i
->bb
->insertAfter(i
, hzd
);
912 // Insert constraint markers for instructions whose multiple sources must be
913 // located in consecutive registers.
915 RegAlloc::InsertConstraintsPass::visit(BasicBlock
*bb
)
921 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
924 if ((tex
= i
->asTex())) {
927 // FIXME: this is target specific
928 if (tex
->op
== OP_TXQ
) {
929 s
= tex
->srcCount(0xff);
932 s
= tex
->tex
.target
.getArgCount();
933 if (!tex
->tex
.target
.isArray() &&
934 (tex
->tex
.rIndirectSrc
>= 0 || tex
->tex
.sIndirectSrc
>= 0))
936 if (tex
->op
== OP_TXD
&& tex
->tex
.useOffsets
)
938 n
= tex
->srcCount(0xff) - s
;
943 addConstraint(i
, 0, s
);
945 addConstraint(i
, s
, n
);
947 if (i
->op
== OP_EXPORT
|| i
->op
== OP_STORE
) {
948 for (size
= typeSizeof(i
->dType
), s
= 1; size
> 0; ++s
) {
949 assert(i
->srcExists(s
));
950 size
-= i
->getSrc(s
)->reg
.size
;
953 addConstraint(i
, 1, s
- 1);
955 if (i
->op
== OP_LOAD
) {
956 if (i
->src
[0].isIndirect(0) && typeSizeof(i
->dType
) >= 8)
957 addHazard(i
, i
->src
[0].getIndirect(0));
963 // Insert extra moves so that, if multiple register constraints on a value are
964 // in conflict, these conflicts can be resolved.
966 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
968 for (DLList::Iterator it
= constrList
.iterator(); !it
.end(); it
.next()) {
969 Instruction
*cst
= reinterpret_cast<Instruction
*>(it
.get());
971 for (int s
= 0; cst
->srcExists(s
); ++s
) {
972 if (!detectConflict(cst
, s
))
974 Instruction
*mov
= new_Instruction(func
, OP_MOV
,
975 typeOfSize(cst
->src
[s
].getSize()));
976 mov
->setSrc(0, cst
->getSrc(s
));
977 mov
->setDef(0, new_LValue(func
, FILE_GPR
));
978 cst
->setSrc(s
, mov
->getDef(0));
980 cst
->bb
->insertBefore(cst
, mov
);
986 } // namespace nv50_ir