3 #include "nv50_ir_target.h"
5 #include "nv50/nv50_debug.h"
9 #define MAX_REGISTER_FILE_SIZE 256
15 RegisterSet(const Target
*);
17 void init(const Target
*);
18 void reset(); // reset allocation status, but not max assigned regs
20 void periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
);
21 void intersect(DataFile f
, const RegisterSet
*);
23 bool assign(Value
**, int nr
);
24 void release(const Value
*);
25 void occupy(const Value
*);
27 int getMaxAssigned(DataFile f
) const { return fill
[f
]; }
32 uint32_t bits
[FILE_ADDRESS
+ 1][(MAX_REGISTER_FILE_SIZE
+ 31) / 32];
34 int unit
[FILE_ADDRESS
+ 1]; // log2 of allocation granularity
36 int last
[FILE_ADDRESS
+ 1];
37 int fill
[FILE_ADDRESS
+ 1];
43 memset(bits
, 0, sizeof(bits
));
46 RegisterSet::RegisterSet()
52 RegisterSet::init(const Target
*targ
)
54 for (unsigned int rf
= 0; rf
<= FILE_ADDRESS
; ++rf
) {
55 DataFile f
= static_cast<DataFile
>(rf
);
56 last
[rf
] = targ
->getFileSize(f
) - 1;
57 unit
[rf
] = targ
->getFileUnit(f
);
59 assert(last
[rf
] < MAX_REGISTER_FILE_SIZE
);
63 RegisterSet::RegisterSet(const Target
*targ
)
70 RegisterSet::periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
)
72 for (int i
= 0; i
< (last
[f
] + 31) / 32; ++i
)
73 bits
[f
][i
] = (bits
[f
][i
] | lock
) & ~unlock
;
77 RegisterSet::intersect(DataFile f
, const RegisterSet
*set
)
79 for (int i
= 0; i
< (last
[f
] + 31) / 32; ++i
)
80 bits
[f
][i
] |= set
->bits
[f
][i
];
84 RegisterSet::print() const
87 for (int i
= 0; i
< (last
[FILE_GPR
] + 31) / 32; ++i
)
88 INFO(" %08x", bits
[FILE_GPR
][i
]);
93 RegisterSet::assign(Value
**def
, int nr
)
95 DataFile f
= def
[0]->reg
.file
;
99 int s
= (n
* def
[0]->reg
.size
) >> unit
[f
];
100 uint32_t m
= (1 << s
) - 1;
102 int id
= last
[f
] + 1;
105 for (i
= 0; (i
* 32) < last
[f
]; ++i
) {
106 if (bits
[f
][i
] == 0xffffffff)
109 for (id
= 0; id
< 32; id
+= s
)
110 if (!(bits
[f
][i
] & (m
<< id
)))
119 bits
[f
][id
/ 32] |= m
<< (id
% 32);
121 if (id
+ (s
- 1) > fill
[f
])
122 fill
[f
] = id
+ (s
- 1);
124 for (i
= 0; i
< nr
; ++i
, ++id
)
125 if (!def
[i
]->livei
.isEmpty()) // XXX: really increased id if empty ?
126 def
[i
]->reg
.data
.id
= id
;
131 RegisterSet::occupy(const Value
*val
)
133 int id
= val
->reg
.data
.id
;
136 unsigned int f
= val
->reg
.file
;
138 uint32_t m
= (1 << (val
->reg
.size
>> unit
[f
])) - 1;
140 INFO_DBG(0, REG_ALLOC
, "reg occupy: %u[%i] %x\n", f
, id
, m
);
142 bits
[f
][id
/ 32] |= m
<< (id
% 32);
149 RegisterSet::release(const Value
*val
)
151 int id
= val
->reg
.data
.id
;
154 unsigned int f
= val
->reg
.file
;
156 uint32_t m
= (1 << (val
->reg
.size
>> unit
[f
])) - 1;
158 INFO_DBG(0, REG_ALLOC
, "reg release: %u[%i] %x\n", f
, id
, m
);
160 bits
[f
][id
/ 32] &= ~(m
<< (id
% 32));
163 #define JOIN_MASK_PHI (1 << 0)
164 #define JOIN_MASK_UNION (1 << 1)
165 #define JOIN_MASK_MOV (1 << 2)
166 #define JOIN_MASK_TEX (1 << 3)
167 #define JOIN_MASK_CONSTRAINT (1 << 4)
172 RegAlloc(Program
*program
) : prog(program
), sequence(0) { }
178 bool coalesceValues(unsigned int mask
);
180 bool allocateConstrainedValues();
183 class PhiMovesPass
: public Pass
{
185 virtual bool visit(BasicBlock
*);
186 inline bool needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
);
189 class BuildIntervalsPass
: public Pass
{
191 virtual bool visit(BasicBlock
*);
192 void collectLiveValues(BasicBlock
*);
193 void addLiveRange(Value
*, const BasicBlock
*, int end
);
196 class InsertConstraintsPass
: public Pass
{
198 bool exec(Function
*func
);
200 virtual bool visit(BasicBlock
*);
202 bool insertConstraintMoves();
204 void addHazard(Instruction
*i
, const ValueRef
*src
);
205 void textureMask(TexInstruction
*);
206 void addConstraint(Instruction
*, int s
, int n
);
207 bool detectConflict(Instruction
*, int s
);
212 bool buildLiveSets(BasicBlock
*);
213 void collectLValues(DLList
&, bool assignedOnly
);
215 void insertOrderedTail(DLList
&, Value
*);
216 inline Instruction
*insnBySerial(int);
222 // instructions in control flow / chronological order
225 int sequence
; // for manual passes through CFG
229 RegAlloc::insnBySerial(int serial
)
231 return reinterpret_cast<Instruction
*>(insns
.get(serial
));
235 RegAlloc::BuildIntervalsPass::addLiveRange(Value
*val
,
236 const BasicBlock
*bb
,
239 Instruction
*insn
= val
->getUniqueInsn();
243 assert(bb
->getFirst()->serial
<= bb
->getExit()->serial
);
244 assert(bb
->getExit()->serial
+ 1 >= end
);
246 int begin
= insn
->serial
;
247 if (begin
< bb
->getEntry()->serial
|| begin
> bb
->getExit()->serial
)
248 begin
= bb
->getEntry()->serial
;
250 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "%%%i <- live range [%i(%i), %i)\n",
251 val
->id
, begin
, insn
->serial
, end
);
253 if (begin
!= end
) // empty ranges are only added as hazards for fixed regs
254 val
->livei
.extend(begin
, end
);
258 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
)
260 if (b
->cfg
.incidentCount() <= 1)
264 for (Graph::EdgeIterator ei
= p
->cfg
.outgoing(); !ei
.end(); ei
.next())
265 if (ei
.getType() == Graph::Edge::TREE
||
266 ei
.getType() == Graph::Edge::FORWARD
)
271 // For each operand of each PHI in b, generate a new value by inserting a MOV
272 // at the end of the block it is coming from and replace the operand with its
273 // result. This eliminates liveness conflicts and enables us to let values be
274 // copied to the right register if such a conflict exists nonetheless.
276 // These MOVs are also crucial in making sure the live intervals of phi srces
277 // are extended until the end of the loop, since they are not included in the
280 RegAlloc::PhiMovesPass::visit(BasicBlock
*bb
)
282 Instruction
*phi
, *mov
;
285 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
286 pb
= pn
= BasicBlock::get(ei
.getNode());
289 if (needNewElseBlock(bb
, pb
)) {
290 pn
= new BasicBlock(func
);
292 // deletes an edge, iterator is invalid after this:
293 pb
->cfg
.detach(&bb
->cfg
);
294 pb
->cfg
.attach(&pn
->cfg
, Graph::Edge::TREE
);
295 pn
->cfg
.attach(&bb
->cfg
, Graph::Edge::FORWARD
); // XXX: check order !
297 assert(pb
->getExit()->op
!= OP_CALL
);
298 if (pb
->getExit()->asFlow()->target
.bb
== bb
)
299 pb
->getExit()->asFlow()->target
.bb
= pn
;
304 // insert MOVs (phi->src[j] should stem from j-th in-BB)
306 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
307 pb
= BasicBlock::get(ei
.getNode());
308 if (!pb
->isTerminated())
309 pb
->insertTail(new_FlowInstruction(func
, OP_BRA
, bb
));
311 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= phi
->next
) {
312 mov
= new_Instruction(func
, OP_MOV
, TYPE_U32
);
314 mov
->setSrc(0, phi
->getSrc(j
));
315 mov
->setDef(0, new_LValue(func
, phi
->getDef(0)->asLValue()));
316 phi
->setSrc(j
, mov
->getDef(0));
318 pb
->insertBefore(pb
->getExit(), mov
);
326 // Build the set of live-in variables of bb.
328 RegAlloc::buildLiveSets(BasicBlock
*bb
)
334 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "buildLiveSets(BB:%i)\n", bb
->getId());
336 bb
->liveSet
.allocate(func
->allLValues
.getSize(), false);
339 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
340 bn
= BasicBlock::get(ei
.getNode());
343 if (bn
->cfg
.visit(sequence
))
344 if (!buildLiveSets(bn
))
347 bb
->liveSet
= bn
->liveSet
;
349 bb
->liveSet
|= bn
->liveSet
;
351 if (!n
&& !bb
->liveSet
.marker
)
353 bb
->liveSet
.marker
= true;
355 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
356 INFO("BB:%i live set of out blocks:\n", bb
->getId());
360 // if (!bb->getEntry())
363 for (i
= bb
->getExit(); i
&& i
!= bb
->getEntry()->prev
; i
= i
->prev
) {
364 for (d
= 0; i
->defExists(d
); ++d
)
365 bb
->liveSet
.clr(i
->getDef(d
)->id
);
366 for (s
= 0; i
->srcExists(s
); ++s
)
367 if (i
->getSrc(s
)->asLValue())
368 bb
->liveSet
.set(i
->getSrc(s
)->id
);
370 for (i
= bb
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
)
371 bb
->liveSet
.clr(i
->getDef(0)->id
);
373 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
374 INFO("BB:%i live set after propagation:\n", bb
->getId());
382 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock
*bb
)
384 BasicBlock
*bbA
= NULL
, *bbB
= NULL
;
386 assert(bb
->cfg
.incidentCount() || bb
->liveSet
.popCount() == 0);
388 if (bb
->cfg
.outgoingCount()) {
389 // trickery to save a loop of OR'ing liveSets
390 // aliasing works fine with BitSet::setOr
391 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
392 if (ei
.getType() == Graph::Edge::DUMMY
)
395 bb
->liveSet
.setOr(&bbA
->liveSet
, &bbB
->liveSet
);
400 bbB
= BasicBlock::get(ei
.getNode());
402 bb
->liveSet
.setOr(&bbB
->liveSet
, bbA
? &bbA
->liveSet
: NULL
);
404 if (bb
->cfg
.incidentCount()) {
410 RegAlloc::BuildIntervalsPass::visit(BasicBlock
*bb
)
412 collectLiveValues(bb
);
414 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "BuildIntervals(BB:%i)\n", bb
->getId());
416 // go through out blocks and delete phi sources that do not originate from
417 // the current block from the live set
418 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
419 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
421 for (Instruction
*i
= out
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
) {
422 bb
->liveSet
.clr(i
->getDef(0)->id
);
424 for (int s
= 0; s
< NV50_IR_MAX_SRCS
&& i
->src
[s
].exists(); ++s
) {
425 assert(i
->src
[s
].getInsn());
426 if (i
->getSrc(s
)->getUniqueInsn()->bb
== bb
) // XXX: reachableBy ?
427 bb
->liveSet
.set(i
->getSrc(s
)->id
);
429 bb
->liveSet
.clr(i
->getSrc(s
)->id
);
434 // remaining live-outs are live until end
436 for (unsigned int j
= 0; j
< bb
->liveSet
.getSize(); ++j
)
437 if (bb
->liveSet
.test(j
))
438 addLiveRange(func
->getLValue(j
), bb
, bb
->getExit()->serial
+ 1);
441 for (Instruction
*i
= bb
->getExit(); i
&& i
->op
!= OP_PHI
; i
= i
->prev
) {
442 for (int d
= 0; i
->defExists(d
); ++d
) {
443 bb
->liveSet
.clr(i
->getDef(d
)->id
);
444 if (i
->getDef(d
)->reg
.data
.id
>= 0) // add hazard for fixed regs
445 i
->getDef(d
)->livei
.extend(i
->serial
, i
->serial
);
448 for (int s
= 0; i
->srcExists(s
); ++s
) {
449 if (!i
->getSrc(s
)->asLValue())
451 if (!bb
->liveSet
.test(i
->getSrc(s
)->id
)) {
452 bb
->liveSet
.set(i
->getSrc(s
)->id
);
453 addLiveRange(i
->getSrc(s
), bb
, i
->serial
);
462 RegAlloc::coalesceValues(unsigned int mask
)
466 for (n
= 0; n
< insns
.getSize(); ++n
) {
468 Instruction
*insn
= insnBySerial(n
);
472 if (!(mask
& JOIN_MASK_PHI
))
474 for (c
= 0; insn
->srcExists(c
); ++c
)
475 if (!insn
->getDef(0)->coalesce(insn
->getSrc(c
), false)) {
476 ERROR("failed to coalesce phi operands\n");
481 if (!(mask
& JOIN_MASK_UNION
))
483 for (c
= 0; insn
->srcExists(c
); ++c
)
484 insn
->getDef(0)->coalesce(insn
->getSrc(c
), true);
487 if (!(mask
& JOIN_MASK_CONSTRAINT
))
489 for (c
= 0; c
< 4 && insn
->srcExists(c
); ++c
)
490 insn
->getDef(c
)->coalesce(insn
->getSrc(c
), true);
493 if (!(mask
& JOIN_MASK_MOV
))
495 i
= insn
->getSrc(0)->getUniqueInsn();
496 if (i
&& !i
->constrainedDefs())
497 insn
->getDef(0)->coalesce(insn
->getSrc(0), false);
507 if (!(mask
& JOIN_MASK_TEX
))
509 for (c
= 0; c
< 4 && insn
->srcExists(c
); ++c
)
510 insn
->getDef(c
)->coalesce(insn
->getSrc(c
), true);
520 RegAlloc::insertOrderedTail(DLList
&list
, Value
*val
)
522 // we insert the live intervals in order, so this should be short
523 DLList::Iterator iter
= list
.revIterator();
524 const int begin
= val
->livei
.begin();
525 for (; !iter
.end(); iter
.next()) {
526 if (reinterpret_cast<Value
*>(iter
.get())->livei
.begin() <= begin
)
533 checkList(DLList
&list
)
538 for (DLList::Iterator iter
= list
.iterator(); !iter
.end(); iter
.next()) {
539 next
= Value::get(iter
);
542 assert(prev
->livei
.begin() <= next
->livei
.begin());
544 assert(next
->join
== next
);
550 RegAlloc::collectLValues(DLList
&list
, bool assignedOnly
)
552 for (int n
= 0; n
< insns
.getSize(); ++n
) {
553 Instruction
*i
= insnBySerial(n
);
555 for (int d
= 0; i
->defExists(d
); ++d
)
556 if (!i
->getDef(d
)->livei
.isEmpty())
557 if (!assignedOnly
|| i
->getDef(d
)->reg
.data
.id
>= 0)
558 insertOrderedTail(list
, i
->getDef(d
));
564 RegAlloc::allocateConstrainedValues()
567 RegisterSet regSet
[4];
570 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RA: allocating constrained values\n");
572 collectLValues(regVals
, true);
574 for (int c
= 0; c
< 4; ++c
)
575 regSet
[c
].init(prog
->getTarget());
577 for (int n
= 0; n
< insns
.getSize(); ++n
) {
578 Instruction
*i
= insnBySerial(n
);
580 const int vecSize
= i
->defCount(0xf);
583 assert(vecSize
<= 4);
585 for (int c
= 0; c
< vecSize
; ++c
)
586 defs
[c
] = i
->def
[c
].rep();
588 if (defs
[0]->reg
.data
.id
>= 0) {
589 for (int c
= 1; c
< vecSize
; ++c
) {
590 assert(defs
[c
]->reg
.data
.id
>= 0);
595 for (int c
= 0; c
< vecSize
; ++c
) {
599 for (DLList::Iterator it
= regVals
.iterator(); !it
.end(); it
.next()) {
600 Value
*rVal
= Value::get(it
);
601 if (rVal
->reg
.data
.id
>= 0 && rVal
->livei
.overlaps(defs
[c
]->livei
))
602 regSet
[c
].occupy(rVal
);
605 if (vecSize
== 2) // granularity is 2 instead of 4
606 mask
|= 0x11111111 << 2;
607 regSet
[c
].periodicMask(defs
[0]->reg
.file
, 0, ~(mask
<< c
));
609 if (!defs
[c
]->livei
.isEmpty())
610 insertOrderedTail(regVals
, defs
[c
]);
612 for (int c
= 1; c
< vecSize
; ++c
)
613 regSet
[0].intersect(defs
[0]->reg
.file
, ®Set
[c
]);
615 if (!regSet
[0].assign(&defs
[0], vecSize
)) // TODO: spilling
618 for (int c
= 0; c
< 4; c
+= 2)
619 if (regSet
[c
].getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
620 prog
->maxGPR
= regSet
[c
].getMaxAssigned(FILE_GPR
);
625 RegAlloc::linearScan()
628 DLList unhandled
, active
, inactive
;
629 RegisterSet
f(prog
->getTarget()), free(prog
->getTarget());
631 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RA: linear scan\n");
633 collectLValues(unhandled
, false);
635 for (DLList::Iterator cI
= unhandled
.iterator(); !cI
.end();) {
636 cur
= Value::get(cI
);
639 for (DLList::Iterator aI
= active
.iterator(); !aI
.end();) {
640 val
= Value::get(aI
);
641 if (val
->livei
.end() <= cur
->livei
.begin()) {
645 if (!val
->livei
.contains(cur
->livei
.begin())) {
647 aI
.moveToList(inactive
);
653 for (DLList::Iterator iI
= inactive
.iterator(); !iI
.end();) {
654 val
= Value::get(iI
);
655 if (val
->livei
.end() <= cur
->livei
.begin()) {
658 if (val
->livei
.contains(cur
->livei
.begin())) {
660 iI
.moveToList(active
);
667 for (DLList::Iterator iI
= inactive
.iterator(); !iI
.end(); iI
.next()) {
668 val
= Value::get(iI
);
669 if (val
->livei
.overlaps(cur
->livei
))
673 for (DLList::Iterator uI
= unhandled
.iterator(); !uI
.end(); uI
.next()) {
674 val
= Value::get(uI
);
675 if (val
->reg
.data
.id
>= 0 && val
->livei
.overlaps(cur
->livei
))
679 if (cur
->reg
.data
.id
< 0) {
680 bool spill
= !f
.assign(&cur
, 1);
682 ERROR("out of registers of file %u\n", cur
->reg
.file
);
690 if (f
.getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
691 prog
->maxGPR
= f
.getMaxAssigned(FILE_GPR
);
692 if (free
.getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
693 prog
->maxGPR
= free
.getMaxAssigned(FILE_GPR
);
700 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
701 !fi
.end(); fi
.next()) {
702 func
= reinterpret_cast<Function
*>(fi
.get());
712 InsertConstraintsPass insertConstr
;
713 PhiMovesPass insertMoves
;
714 BuildIntervalsPass buildIntervals
;
719 ret
= insertConstr
.exec(func
);
723 ret
= insertMoves
.run(func
);
727 for (sequence
= func
->cfg
.nextSequence(), i
= 0;
728 ret
&& i
<= func
->loopNestingBound
;
729 sequence
= func
->cfg
.nextSequence(), ++i
)
730 ret
= buildLiveSets(BasicBlock::get(func
->cfg
.getRoot()));
734 func
->orderInstructions(this->insns
);
736 ret
= buildIntervals
.run(func
);
740 ret
= coalesceValues(JOIN_MASK_PHI
);
743 switch (prog
->getTarget()->getChipset() & 0xf0) {
745 ret
= coalesceValues(JOIN_MASK_UNION
| JOIN_MASK_TEX
);
748 ret
= coalesceValues(JOIN_MASK_UNION
| JOIN_MASK_CONSTRAINT
);
755 ret
= coalesceValues(JOIN_MASK_MOV
);
759 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
761 func
->printLiveIntervals();
764 ret
= allocateConstrainedValues() && linearScan();
769 // TODO: should probably call destructor on LValues later instead
770 for (ArrayList::Iterator it
= func
->allLValues
.iterator();
771 !it
.end(); it
.next())
772 reinterpret_cast<LValue
*>(it
.get())->livei
.clear();
777 bool Program::registerAllocation()
784 RegAlloc::InsertConstraintsPass::exec(Function
*ir
)
788 bool ret
= run(ir
, true, true);
790 ret
= insertConstraintMoves();
794 // TODO: make part of texture insn
796 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction
*tex
)
802 for (d
= 0, k
= 0, c
= 0; c
< 4; ++c
) {
803 if (!(tex
->tex
.mask
& (1 << c
)))
805 if (tex
->getDef(k
)->refCount()) {
807 def
[d
++] = tex
->getDef(k
);
811 tex
->tex
.mask
= mask
;
813 #if 0 // reorder or set the unused ones NULL ?
814 for (c
= 0; c
< 4; ++c
)
815 if (!(tex
->tex
.mask
& (1 << c
)))
816 def
[d
++] = tex
->getDef(c
);
818 for (c
= 0; c
< d
; ++c
)
819 tex
->setDef(c
, def
[c
]);
822 tex
->setDef(c
, NULL
);
827 RegAlloc::InsertConstraintsPass::detectConflict(Instruction
*cst
, int s
)
829 // current register allocation can't handle it if a value participates in
830 // multiple constraints
831 for (ValueRef::Iterator it
= cst
->src
[s
].iterator(); !it
.end(); it
.next()) {
832 Instruction
*insn
= it
.get()->getInsn();
837 // can start at s + 1 because detectConflict is called on all sources
838 for (int c
= s
+ 1; cst
->srcExists(c
); ++c
)
839 if (cst
->getSrc(c
) == cst
->getSrc(s
))
842 Instruction
*defi
= cst
->getSrc(s
)->getInsn();
844 return (!defi
|| defi
->constrainedDefs());
848 RegAlloc::InsertConstraintsPass::addConstraint(Instruction
*i
, int s
, int n
)
853 // first, look for an existing identical constraint op
854 for (DLList::Iterator it
= constrList
.iterator(); !it
.end(); it
.next()) {
855 cst
= reinterpret_cast<Instruction
*>(it
.get());
856 if (!i
->bb
->dominatedBy(cst
->bb
))
858 for (d
= 0; d
< n
; ++d
)
859 if (cst
->getSrc(d
) != i
->getSrc(d
+ s
))
862 for (d
= 0; d
< n
; ++d
, ++s
)
863 i
->setSrc(s
, cst
->getDef(d
));
867 cst
= new_Instruction(func
, OP_CONSTRAINT
, i
->dType
);
869 for (d
= 0; d
< n
; ++s
, ++d
) {
870 cst
->setDef(d
, new_LValue(func
, FILE_GPR
));
871 cst
->setSrc(d
, i
->getSrc(s
));
872 i
->setSrc(s
, cst
->getDef(d
));
874 i
->bb
->insertBefore(i
, cst
);
876 constrList
.insert(cst
);
879 // Add a dummy use of the pointer source of >= 8 byte loads after the load
880 // to prevent it from being assigned a register which overlapping the load's
881 // destination, which would produce random corruptions.
883 RegAlloc::InsertConstraintsPass::addHazard(Instruction
*i
, const ValueRef
*src
)
885 Instruction
*hzd
= new_Instruction(func
, OP_NOP
, TYPE_NONE
);
886 hzd
->setSrc(0, src
->get());
887 i
->bb
->insertAfter(i
, hzd
);
891 // Insert constraint markers for instructions whose multiple sources must be
892 // located in consecutive registers.
894 RegAlloc::InsertConstraintsPass::visit(BasicBlock
*bb
)
900 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
903 if ((tex
= i
->asTex())) {
906 // FIXME: this is target specific
907 if (tex
->op
== OP_TXQ
) {
908 s
= tex
->srcCount(0xff);
911 s
= tex
->tex
.target
.getArgCount();
912 if (!tex
->tex
.target
.isArray() &&
913 (tex
->tex
.rIndirectSrc
>= 0 || tex
->tex
.sIndirectSrc
>= 0))
915 n
= tex
->srcCount(0xff) - s
;
920 addConstraint(i
, 0, s
);
922 addConstraint(i
, s
, n
);
924 if (i
->op
== OP_EXPORT
|| i
->op
== OP_STORE
) {
925 for (size
= typeSizeof(i
->dType
), s
= 1; size
> 0; ++s
) {
926 assert(i
->srcExists(s
));
927 size
-= i
->getSrc(s
)->reg
.size
;
930 addConstraint(i
, 1, s
- 1);
932 if (i
->op
== OP_LOAD
) {
933 if (i
->src
[0].isIndirect(0) && typeSizeof(i
->dType
) >= 8)
934 addHazard(i
, i
->src
[0].getIndirect(0));
940 // Insert extra moves so that, if multiple register constraints on a value are
941 // in conflict, these conflicts can be resolved.
943 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
945 for (DLList::Iterator it
= constrList
.iterator(); !it
.end(); it
.next()) {
946 Instruction
*cst
= reinterpret_cast<Instruction
*>(it
.get());
948 for (int s
= 0; cst
->srcExists(s
); ++s
) {
949 if (!detectConflict(cst
, s
))
951 Instruction
*mov
= new_Instruction(func
, OP_MOV
,
952 typeOfSize(cst
->src
[s
].getSize()));
953 mov
->setSrc(0, cst
->getSrc(s
));
954 mov
->setDef(0, new_LValue(func
, FILE_GPR
));
955 cst
->setSrc(s
, mov
->getDef(0));
957 cst
->bb
->insertBefore(cst
, mov
);
963 } // namespace nv50_ir