2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nv50_ir_target.h"
26 #include "nv50/nv50_debug.h"
30 #define MAX_REGISTER_FILE_SIZE 256
36 RegisterSet(const Target
*);
38 void init(const Target
*);
39 void reset(); // reset allocation status, but not max assigned regs
41 void periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
);
42 void intersect(DataFile f
, const RegisterSet
*);
44 bool assign(Value
**, int nr
);
45 void release(const Value
*);
46 void occupy(const Value
*);
48 int getMaxAssigned(DataFile f
) const { return fill
[f
]; }
53 uint32_t bits
[FILE_ADDRESS
+ 1][(MAX_REGISTER_FILE_SIZE
+ 31) / 32];
55 int unit
[FILE_ADDRESS
+ 1]; // log2 of allocation granularity
57 int last
[FILE_ADDRESS
+ 1];
58 int fill
[FILE_ADDRESS
+ 1];
64 memset(bits
, 0, sizeof(bits
));
67 RegisterSet::RegisterSet()
73 RegisterSet::init(const Target
*targ
)
75 for (unsigned int rf
= 0; rf
<= FILE_ADDRESS
; ++rf
) {
76 DataFile f
= static_cast<DataFile
>(rf
);
77 last
[rf
] = targ
->getFileSize(f
) - 1;
78 unit
[rf
] = targ
->getFileUnit(f
);
80 assert(last
[rf
] < MAX_REGISTER_FILE_SIZE
);
84 RegisterSet::RegisterSet(const Target
*targ
)
91 RegisterSet::periodicMask(DataFile f
, uint32_t lock
, uint32_t unlock
)
93 for (int i
= 0; i
< (last
[f
] + 31) / 32; ++i
)
94 bits
[f
][i
] = (bits
[f
][i
] | lock
) & ~unlock
;
98 RegisterSet::intersect(DataFile f
, const RegisterSet
*set
)
100 for (int i
= 0; i
< (last
[f
] + 31) / 32; ++i
)
101 bits
[f
][i
] |= set
->bits
[f
][i
];
105 RegisterSet::print() const
108 for (int i
= 0; i
< (last
[FILE_GPR
] + 31) / 32; ++i
)
109 INFO(" %08x", bits
[FILE_GPR
][i
]);
114 RegisterSet::assign(Value
**def
, int nr
)
116 DataFile f
= def
[0]->reg
.file
;
120 int s
= (n
* def
[0]->reg
.size
) >> unit
[f
];
121 uint32_t m
= (1 << s
) - 1;
123 int id
= last
[f
] + 1;
126 for (i
= 0; (i
* 32) < last
[f
]; ++i
) {
127 if (bits
[f
][i
] == 0xffffffff)
130 for (id
= 0; id
< 32; id
+= s
)
131 if (!(bits
[f
][i
] & (m
<< id
)))
140 bits
[f
][id
/ 32] |= m
<< (id
% 32);
142 if (id
+ (s
- 1) > fill
[f
])
143 fill
[f
] = id
+ (s
- 1);
145 for (i
= 0; i
< nr
; ++i
, ++id
)
146 if (!def
[i
]->livei
.isEmpty()) // XXX: really increased id if empty ?
147 def
[i
]->reg
.data
.id
= id
;
152 RegisterSet::occupy(const Value
*val
)
154 int id
= val
->reg
.data
.id
;
157 unsigned int f
= val
->reg
.file
;
159 uint32_t m
= (1 << (val
->reg
.size
>> unit
[f
])) - 1;
161 INFO_DBG(0, REG_ALLOC
, "reg occupy: %u[%i] %x\n", f
, id
, m
);
163 bits
[f
][id
/ 32] |= m
<< (id
% 32);
170 RegisterSet::release(const Value
*val
)
172 int id
= val
->reg
.data
.id
;
175 unsigned int f
= val
->reg
.file
;
177 uint32_t m
= (1 << (val
->reg
.size
>> unit
[f
])) - 1;
179 INFO_DBG(0, REG_ALLOC
, "reg release: %u[%i] %x\n", f
, id
, m
);
181 bits
[f
][id
/ 32] &= ~(m
<< (id
% 32));
184 #define JOIN_MASK_PHI (1 << 0)
185 #define JOIN_MASK_UNION (1 << 1)
186 #define JOIN_MASK_MOV (1 << 2)
187 #define JOIN_MASK_TEX (1 << 3)
188 #define JOIN_MASK_CONSTRAINT (1 << 4)
193 RegAlloc(Program
*program
) : prog(program
), sequence(0) { }
199 bool coalesceValues(unsigned int mask
);
201 bool allocateConstrainedValues();
204 class PhiMovesPass
: public Pass
{
206 virtual bool visit(BasicBlock
*);
207 inline bool needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
);
210 class BuildIntervalsPass
: public Pass
{
212 virtual bool visit(BasicBlock
*);
213 void collectLiveValues(BasicBlock
*);
214 void addLiveRange(Value
*, const BasicBlock
*, int end
);
217 class InsertConstraintsPass
: public Pass
{
219 bool exec(Function
*func
);
221 virtual bool visit(BasicBlock
*);
223 bool insertConstraintMoves();
225 void addHazard(Instruction
*i
, const ValueRef
*src
);
226 void textureMask(TexInstruction
*);
227 void addConstraint(Instruction
*, int s
, int n
);
228 bool detectConflict(Instruction
*, int s
);
233 bool buildLiveSets(BasicBlock
*);
234 void collectLValues(DLList
&, bool assignedOnly
);
236 void insertOrderedTail(DLList
&, Value
*);
237 inline Instruction
*insnBySerial(int);
243 // instructions in control flow / chronological order
246 int sequence
; // for manual passes through CFG
250 RegAlloc::insnBySerial(int serial
)
252 return reinterpret_cast<Instruction
*>(insns
.get(serial
));
256 RegAlloc::BuildIntervalsPass::addLiveRange(Value
*val
,
257 const BasicBlock
*bb
,
260 Instruction
*insn
= val
->getUniqueInsn();
264 assert(bb
->getFirst()->serial
<= bb
->getExit()->serial
);
265 assert(bb
->getExit()->serial
+ 1 >= end
);
267 int begin
= insn
->serial
;
268 if (begin
< bb
->getEntry()->serial
|| begin
> bb
->getExit()->serial
)
269 begin
= bb
->getEntry()->serial
;
271 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "%%%i <- live range [%i(%i), %i)\n",
272 val
->id
, begin
, insn
->serial
, end
);
274 if (begin
!= end
) // empty ranges are only added as hazards for fixed regs
275 val
->livei
.extend(begin
, end
);
279 RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock
*b
, BasicBlock
*p
)
281 if (b
->cfg
.incidentCount() <= 1)
285 for (Graph::EdgeIterator ei
= p
->cfg
.outgoing(); !ei
.end(); ei
.next())
286 if (ei
.getType() == Graph::Edge::TREE
||
287 ei
.getType() == Graph::Edge::FORWARD
)
292 // For each operand of each PHI in b, generate a new value by inserting a MOV
293 // at the end of the block it is coming from and replace the operand with its
294 // result. This eliminates liveness conflicts and enables us to let values be
295 // copied to the right register if such a conflict exists nonetheless.
297 // These MOVs are also crucial in making sure the live intervals of phi srces
298 // are extended until the end of the loop, since they are not included in the
301 RegAlloc::PhiMovesPass::visit(BasicBlock
*bb
)
303 Instruction
*phi
, *mov
;
306 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
307 pb
= pn
= BasicBlock::get(ei
.getNode());
310 if (needNewElseBlock(bb
, pb
)) {
311 pn
= new BasicBlock(func
);
313 // deletes an edge, iterator is invalid after this:
314 pb
->cfg
.detach(&bb
->cfg
);
315 pb
->cfg
.attach(&pn
->cfg
, Graph::Edge::TREE
);
316 pn
->cfg
.attach(&bb
->cfg
, Graph::Edge::FORWARD
); // XXX: check order !
318 assert(pb
->getExit()->op
!= OP_CALL
);
319 if (pb
->getExit()->asFlow()->target
.bb
== bb
)
320 pb
->getExit()->asFlow()->target
.bb
= pn
;
325 // insert MOVs (phi->src(j) should stem from j-th in-BB)
327 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
328 pb
= BasicBlock::get(ei
.getNode());
329 if (!pb
->isTerminated())
330 pb
->insertTail(new_FlowInstruction(func
, OP_BRA
, bb
));
332 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= phi
->next
) {
333 mov
= new_Instruction(func
, OP_MOV
, TYPE_U32
);
335 mov
->setSrc(0, phi
->getSrc(j
));
336 mov
->setDef(0, new_LValue(func
, phi
->getDef(0)->asLValue()));
337 phi
->setSrc(j
, mov
->getDef(0));
339 pb
->insertBefore(pb
->getExit(), mov
);
347 // Build the set of live-in variables of bb.
349 RegAlloc::buildLiveSets(BasicBlock
*bb
)
355 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "buildLiveSets(BB:%i)\n", bb
->getId());
357 bb
->liveSet
.allocate(func
->allLValues
.getSize(), false);
360 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
361 bn
= BasicBlock::get(ei
.getNode());
364 if (bn
->cfg
.visit(sequence
))
365 if (!buildLiveSets(bn
))
368 bb
->liveSet
= bn
->liveSet
;
370 bb
->liveSet
|= bn
->liveSet
;
372 if (!n
&& !bb
->liveSet
.marker
)
374 bb
->liveSet
.marker
= true;
376 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
377 INFO("BB:%i live set of out blocks:\n", bb
->getId());
381 // if (!bb->getEntry())
384 for (i
= bb
->getExit(); i
&& i
!= bb
->getEntry()->prev
; i
= i
->prev
) {
385 for (d
= 0; i
->defExists(d
); ++d
)
386 bb
->liveSet
.clr(i
->getDef(d
)->id
);
387 for (s
= 0; i
->srcExists(s
); ++s
)
388 if (i
->getSrc(s
)->asLValue())
389 bb
->liveSet
.set(i
->getSrc(s
)->id
);
391 for (i
= bb
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
)
392 bb
->liveSet
.clr(i
->getDef(0)->id
);
394 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
395 INFO("BB:%i live set after propagation:\n", bb
->getId());
403 RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock
*bb
)
405 BasicBlock
*bbA
= NULL
, *bbB
= NULL
;
407 assert(bb
->cfg
.incidentCount() || bb
->liveSet
.popCount() == 0);
409 if (bb
->cfg
.outgoingCount()) {
410 // trickery to save a loop of OR'ing liveSets
411 // aliasing works fine with BitSet::setOr
412 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
413 if (ei
.getType() == Graph::Edge::DUMMY
)
416 bb
->liveSet
.setOr(&bbA
->liveSet
, &bbB
->liveSet
);
421 bbB
= BasicBlock::get(ei
.getNode());
423 bb
->liveSet
.setOr(&bbB
->liveSet
, bbA
? &bbA
->liveSet
: NULL
);
425 if (bb
->cfg
.incidentCount()) {
431 RegAlloc::BuildIntervalsPass::visit(BasicBlock
*bb
)
433 collectLiveValues(bb
);
435 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "BuildIntervals(BB:%i)\n", bb
->getId());
437 // go through out blocks and delete phi sources that do not originate from
438 // the current block from the live set
439 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
440 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
442 for (Instruction
*i
= out
->getPhi(); i
&& i
->op
== OP_PHI
; i
= i
->next
) {
443 bb
->liveSet
.clr(i
->getDef(0)->id
);
445 for (int s
= 0; i
->srcExists(s
); ++s
) {
446 assert(i
->src(s
).getInsn());
447 if (i
->getSrc(s
)->getUniqueInsn()->bb
== bb
) // XXX: reachableBy ?
448 bb
->liveSet
.set(i
->getSrc(s
)->id
);
450 bb
->liveSet
.clr(i
->getSrc(s
)->id
);
455 // remaining live-outs are live until end
457 for (unsigned int j
= 0; j
< bb
->liveSet
.getSize(); ++j
)
458 if (bb
->liveSet
.test(j
))
459 addLiveRange(func
->getLValue(j
), bb
, bb
->getExit()->serial
+ 1);
462 for (Instruction
*i
= bb
->getExit(); i
&& i
->op
!= OP_PHI
; i
= i
->prev
) {
463 for (int d
= 0; i
->defExists(d
); ++d
) {
464 bb
->liveSet
.clr(i
->getDef(d
)->id
);
465 if (i
->getDef(d
)->reg
.data
.id
>= 0) // add hazard for fixed regs
466 i
->getDef(d
)->livei
.extend(i
->serial
, i
->serial
);
469 for (int s
= 0; i
->srcExists(s
); ++s
) {
470 if (!i
->getSrc(s
)->asLValue())
472 if (!bb
->liveSet
.test(i
->getSrc(s
)->id
)) {
473 bb
->liveSet
.set(i
->getSrc(s
)->id
);
474 addLiveRange(i
->getSrc(s
), bb
, i
->serial
);
483 RegAlloc::coalesceValues(unsigned int mask
)
487 for (n
= 0; n
< insns
.getSize(); ++n
) {
489 Instruction
*insn
= insnBySerial(n
);
493 if (!(mask
& JOIN_MASK_PHI
))
495 for (c
= 0; insn
->srcExists(c
); ++c
)
496 if (!insn
->getDef(0)->coalesce(insn
->getSrc(c
), false)) {
497 ERROR("failed to coalesce phi operands\n");
502 if (!(mask
& JOIN_MASK_UNION
))
504 for (c
= 0; insn
->srcExists(c
); ++c
)
505 insn
->getDef(0)->coalesce(insn
->getSrc(c
), true);
508 if (!(mask
& JOIN_MASK_CONSTRAINT
))
510 for (c
= 0; c
< 4 && insn
->srcExists(c
); ++c
)
511 insn
->getDef(c
)->coalesce(insn
->getSrc(c
), true);
514 if (!(mask
& JOIN_MASK_MOV
))
517 if (!insn
->getDef(0)->uses
.empty())
518 i
= insn
->getDef(0)->uses
.front()->getInsn();
519 // if this is a contraint-move there will only be a single use
520 if (i
&& i
->op
== OP_CONSTRAINT
)
522 i
= insn
->getSrc(0)->getUniqueInsn();
523 if (i
&& !i
->constrainedDefs())
524 insn
->getDef(0)->coalesce(insn
->getSrc(0), false);
534 if (!(mask
& JOIN_MASK_TEX
))
536 for (c
= 0; c
< 4 && insn
->srcExists(c
); ++c
)
537 insn
->getDef(c
)->coalesce(insn
->getSrc(c
), true);
547 RegAlloc::insertOrderedTail(DLList
&list
, Value
*val
)
549 // we insert the live intervals in order, so this should be short
550 DLList::Iterator iter
= list
.revIterator();
551 const int begin
= val
->livei
.begin();
552 for (; !iter
.end(); iter
.next()) {
553 if (reinterpret_cast<Value
*>(iter
.get())->livei
.begin() <= begin
)
560 checkList(DLList
&list
)
565 for (DLList::Iterator iter
= list
.iterator(); !iter
.end(); iter
.next()) {
566 next
= Value::get(iter
);
569 assert(prev
->livei
.begin() <= next
->livei
.begin());
571 assert(next
->join
== next
);
577 RegAlloc::collectLValues(DLList
&list
, bool assignedOnly
)
579 for (int n
= 0; n
< insns
.getSize(); ++n
) {
580 Instruction
*i
= insnBySerial(n
);
582 for (int d
= 0; i
->defExists(d
); ++d
)
583 if (!i
->getDef(d
)->livei
.isEmpty())
584 if (!assignedOnly
|| i
->getDef(d
)->reg
.data
.id
>= 0)
585 insertOrderedTail(list
, i
->getDef(d
));
591 RegAlloc::allocateConstrainedValues()
594 RegisterSet regSet
[4];
597 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RA: allocating constrained values\n");
599 collectLValues(regVals
, true);
601 for (int c
= 0; c
< 4; ++c
)
602 regSet
[c
].init(prog
->getTarget());
604 for (int n
= 0; n
< insns
.getSize(); ++n
) {
605 Instruction
*i
= insnBySerial(n
);
607 const int vecSize
= i
->defCount(0xf);
610 assert(vecSize
<= 4);
612 for (int c
= 0; c
< vecSize
; ++c
)
613 defs
[c
] = i
->def(c
).rep();
615 if (defs
[0]->reg
.data
.id
>= 0) {
616 for (int c
= 1; c
< vecSize
; ++c
) {
617 assert(defs
[c
]->reg
.data
.id
>= 0);
622 for (int c
= 0; c
< vecSize
; ++c
) {
626 for (DLList::Iterator it
= regVals
.iterator(); !it
.end(); it
.next()) {
627 Value
*rVal
= Value::get(it
);
628 if (rVal
->reg
.data
.id
>= 0 && rVal
->livei
.overlaps(defs
[c
]->livei
))
629 regSet
[c
].occupy(rVal
);
632 if (vecSize
== 2) // granularity is 2 instead of 4
633 mask
|= 0x11111111 << 2;
634 regSet
[c
].periodicMask(defs
[0]->reg
.file
, 0, ~(mask
<< c
));
636 if (!defs
[c
]->livei
.isEmpty())
637 insertOrderedTail(regVals
, defs
[c
]);
639 for (int c
= 1; c
< vecSize
; ++c
)
640 regSet
[0].intersect(defs
[0]->reg
.file
, ®Set
[c
]);
642 if (!regSet
[0].assign(&defs
[0], vecSize
)) // TODO: spilling
645 for (int c
= 0; c
< 4; c
+= 2)
646 if (regSet
[c
].getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
647 prog
->maxGPR
= regSet
[c
].getMaxAssigned(FILE_GPR
);
652 RegAlloc::linearScan()
655 DLList unhandled
, active
, inactive
;
656 RegisterSet
f(prog
->getTarget()), free(prog
->getTarget());
658 INFO_DBG(prog
->dbgFlags
, REG_ALLOC
, "RA: linear scan\n");
660 collectLValues(unhandled
, false);
662 for (DLList::Iterator cI
= unhandled
.iterator(); !cI
.end();) {
663 cur
= Value::get(cI
);
666 for (DLList::Iterator aI
= active
.iterator(); !aI
.end();) {
667 val
= Value::get(aI
);
668 if (val
->livei
.end() <= cur
->livei
.begin()) {
672 if (!val
->livei
.contains(cur
->livei
.begin())) {
674 aI
.moveToList(inactive
);
680 for (DLList::Iterator iI
= inactive
.iterator(); !iI
.end();) {
681 val
= Value::get(iI
);
682 if (val
->livei
.end() <= cur
->livei
.begin()) {
685 if (val
->livei
.contains(cur
->livei
.begin())) {
687 iI
.moveToList(active
);
694 for (DLList::Iterator iI
= inactive
.iterator(); !iI
.end(); iI
.next()) {
695 val
= Value::get(iI
);
696 if (val
->livei
.overlaps(cur
->livei
))
700 for (DLList::Iterator uI
= unhandled
.iterator(); !uI
.end(); uI
.next()) {
701 val
= Value::get(uI
);
702 if (val
->reg
.data
.id
>= 0 && val
->livei
.overlaps(cur
->livei
))
706 if (cur
->reg
.data
.id
< 0) {
707 bool spill
= !f
.assign(&cur
, 1);
709 ERROR("out of registers of file %u\n", cur
->reg
.file
);
717 if (f
.getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
718 prog
->maxGPR
= f
.getMaxAssigned(FILE_GPR
);
719 if (free
.getMaxAssigned(FILE_GPR
) > prog
->maxGPR
)
720 prog
->maxGPR
= free
.getMaxAssigned(FILE_GPR
);
727 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
728 !fi
.end(); fi
.next()) {
729 func
= reinterpret_cast<Function
*>(fi
.get());
739 InsertConstraintsPass insertConstr
;
740 PhiMovesPass insertMoves
;
741 BuildIntervalsPass buildIntervals
;
746 ret
= insertConstr
.exec(func
);
750 ret
= insertMoves
.run(func
);
754 for (sequence
= func
->cfg
.nextSequence(), i
= 0;
755 ret
&& i
<= func
->loopNestingBound
;
756 sequence
= func
->cfg
.nextSequence(), ++i
)
757 ret
= buildLiveSets(BasicBlock::get(func
->cfg
.getRoot()));
761 func
->orderInstructions(this->insns
);
763 ret
= buildIntervals
.run(func
);
767 ret
= coalesceValues(JOIN_MASK_PHI
);
770 switch (prog
->getTarget()->getChipset() & 0xf0) {
772 ret
= coalesceValues(JOIN_MASK_UNION
| JOIN_MASK_TEX
);
775 ret
= coalesceValues(JOIN_MASK_UNION
| JOIN_MASK_CONSTRAINT
);
782 ret
= coalesceValues(JOIN_MASK_MOV
);
786 if (prog
->dbgFlags
& NV50_IR_DEBUG_REG_ALLOC
) {
788 func
->printLiveIntervals();
791 ret
= allocateConstrainedValues() && linearScan();
796 // TODO: should probably call destructor on LValues later instead
797 for (ArrayList::Iterator it
= func
->allLValues
.iterator();
798 !it
.end(); it
.next())
799 reinterpret_cast<LValue
*>(it
.get())->livei
.clear();
804 bool Program::registerAllocation()
811 RegAlloc::InsertConstraintsPass::exec(Function
*ir
)
815 bool ret
= run(ir
, true, true);
817 ret
= insertConstraintMoves();
821 // TODO: make part of texture insn
823 RegAlloc::InsertConstraintsPass::textureMask(TexInstruction
*tex
)
829 for (d
= 0, k
= 0, c
= 0; c
< 4; ++c
) {
830 if (!(tex
->tex
.mask
& (1 << c
)))
832 if (tex
->getDef(k
)->refCount()) {
834 def
[d
++] = tex
->getDef(k
);
838 tex
->tex
.mask
= mask
;
840 #if 0 // reorder or set the unused ones NULL ?
841 for (c
= 0; c
< 4; ++c
)
842 if (!(tex
->tex
.mask
& (1 << c
)))
843 def
[d
++] = tex
->getDef(c
);
845 for (c
= 0; c
< d
; ++c
)
846 tex
->setDef(c
, def
[c
]);
849 tex
->setDef(c
, NULL
);
854 RegAlloc::InsertConstraintsPass::detectConflict(Instruction
*cst
, int s
)
856 Value
*v
= cst
->getSrc(s
);
858 // current register allocation can't handle it if a value participates in
859 // multiple constraints
860 for (Value::UseIterator it
= v
->uses
.begin(); it
!= v
->uses
.end(); ++it
) {
861 if (cst
!= (*it
)->getInsn())
865 // can start at s + 1 because detectConflict is called on all sources
866 for (int c
= s
+ 1; cst
->srcExists(c
); ++c
)
867 if (v
== cst
->getSrc(c
))
870 Instruction
*defi
= v
->getInsn();
872 return (!defi
|| defi
->constrainedDefs());
876 RegAlloc::InsertConstraintsPass::addConstraint(Instruction
*i
, int s
, int n
)
881 // first, look for an existing identical constraint op
882 for (DLList::Iterator it
= constrList
.iterator(); !it
.end(); it
.next()) {
883 cst
= reinterpret_cast<Instruction
*>(it
.get());
884 if (!i
->bb
->dominatedBy(cst
->bb
))
886 for (d
= 0; d
< n
; ++d
)
887 if (cst
->getSrc(d
) != i
->getSrc(d
+ s
))
890 for (d
= 0; d
< n
; ++d
, ++s
)
891 i
->setSrc(s
, cst
->getDef(d
));
895 cst
= new_Instruction(func
, OP_CONSTRAINT
, i
->dType
);
897 for (d
= 0; d
< n
; ++s
, ++d
) {
898 cst
->setDef(d
, new_LValue(func
, FILE_GPR
));
899 cst
->setSrc(d
, i
->getSrc(s
));
900 i
->setSrc(s
, cst
->getDef(d
));
902 i
->bb
->insertBefore(i
, cst
);
904 constrList
.insert(cst
);
907 // Add a dummy use of the pointer source of >= 8 byte loads after the load
908 // to prevent it from being assigned a register which overlapping the load's
909 // destination, which would produce random corruptions.
911 RegAlloc::InsertConstraintsPass::addHazard(Instruction
*i
, const ValueRef
*src
)
913 Instruction
*hzd
= new_Instruction(func
, OP_NOP
, TYPE_NONE
);
914 hzd
->setSrc(0, src
->get());
915 i
->bb
->insertAfter(i
, hzd
);
919 // Insert constraint markers for instructions whose multiple sources must be
920 // located in consecutive registers.
922 RegAlloc::InsertConstraintsPass::visit(BasicBlock
*bb
)
928 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
931 if ((tex
= i
->asTex())) {
934 // FIXME: this is target specific
935 if (tex
->op
== OP_TXQ
) {
936 s
= tex
->srcCount(0xff);
939 s
= tex
->tex
.target
.getArgCount();
940 if (!tex
->tex
.target
.isArray() &&
941 (tex
->tex
.rIndirectSrc
>= 0 || tex
->tex
.sIndirectSrc
>= 0))
943 if (tex
->op
== OP_TXD
&& tex
->tex
.useOffsets
)
945 n
= tex
->srcCount(0xff) - s
;
950 addConstraint(i
, 0, s
);
952 addConstraint(i
, s
, n
);
954 if (i
->op
== OP_EXPORT
|| i
->op
== OP_STORE
) {
955 for (size
= typeSizeof(i
->dType
), s
= 1; size
> 0; ++s
) {
956 assert(i
->srcExists(s
));
957 size
-= i
->getSrc(s
)->reg
.size
;
960 addConstraint(i
, 1, s
- 1);
962 if (i
->op
== OP_LOAD
) {
963 if (i
->src(0).isIndirect(0) && typeSizeof(i
->dType
) >= 8)
964 addHazard(i
, i
->src(0).getIndirect(0));
970 // Insert extra moves so that, if multiple register constraints on a value are
971 // in conflict, these conflicts can be resolved.
973 RegAlloc::InsertConstraintsPass::insertConstraintMoves()
975 for (DLList::Iterator it
= constrList
.iterator(); !it
.end(); it
.next()) {
976 Instruction
*cst
= reinterpret_cast<Instruction
*>(it
.get());
978 for (int s
= 0; cst
->srcExists(s
); ++s
) {
979 if (!detectConflict(cst
, s
))
981 Instruction
*mov
= new_Instruction(func
, OP_MOV
,
982 typeOfSize(cst
->src(s
).getSize()));
983 mov
->setSrc(0, cst
->getSrc(s
));
984 mov
->setDef(0, new_LValue(func
, FILE_GPR
));
985 cst
->setSrc(s
, mov
->getDef(0));
987 cst
->bb
->insertBefore(cst
, mov
);
993 } // namespace nv50_ir