2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "nv50/codegen/nv50_ir.h"
24 #include "nv50/codegen/nv50_ir_build_util.h"
26 #include "nv50_ir_target_nvc0.h"
38 #define QUADOP(q, r, s, t) \
39 ((QOP_##q << 6) | (QOP_##r << 4) | \
40 (QOP_##s << 2) | (QOP_##t << 0))
42 class NVC0LegalizeSSA
: public Pass
45 virtual bool visit(BasicBlock
*);
46 virtual bool visit(Function
*);
48 // we want to insert calls to the builtin library only after optimization
49 void handleDIV(Instruction
*); // integer division, modulus
50 void handleRCPRSQ(Instruction
*); // double precision float recip/rsqrt
57 NVC0LegalizeSSA::handleDIV(Instruction
*i
)
59 FlowInstruction
*call
;
63 bld
.setPosition(i
, false);
64 def
[0] = bld
.mkMovToReg(0, i
->getSrc(0))->getDef(0);
65 def
[1] = bld
.mkMovToReg(1, i
->getSrc(1))->getDef(0);
67 case TYPE_U32
: builtin
= NVC0_BUILTIN_DIV_U32
; break;
68 case TYPE_S32
: builtin
= NVC0_BUILTIN_DIV_S32
; break;
72 call
= bld
.mkFlow(OP_CALL
, NULL
, CC_ALWAYS
, NULL
);
73 bld
.mkMov(i
->getDef(0), def
[(i
->op
== OP_DIV
) ? 0 : 1]);
74 bld
.mkClobber(FILE_GPR
, (i
->op
== OP_DIV
) ? 0xe : 0xd, 2);
75 bld
.mkClobber(FILE_PREDICATE
, (i
->dType
== TYPE_S32
) ? 0xf : 0x3, 0);
78 call
->absolute
= call
->builtin
= 1;
79 call
->target
.builtin
= builtin
;
80 delete_Instruction(prog
, i
);
84 NVC0LegalizeSSA::handleRCPRSQ(Instruction
*i
)
90 NVC0LegalizeSSA::visit(Function
*fn
)
92 bld
.setProgram(fn
->getProgram());
97 NVC0LegalizeSSA::visit(BasicBlock
*bb
)
100 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
102 if (i
->dType
== TYPE_F32
)
111 if (i
->dType
== TYPE_F64
)
121 class NVC0LegalizePostRA
: public Pass
124 NVC0LegalizePostRA(const Program
*);
127 virtual bool visit(Function
*);
128 virtual bool visit(BasicBlock
*);
130 void replaceZero(Instruction
*);
131 void split64BitOp(Instruction
*);
132 bool tryReplaceContWithBra(BasicBlock
*);
133 void propagateJoin(BasicBlock
*);
137 TexUse(Instruction
*use
, const Instruction
*tex
)
138 : insn(use
), tex(tex
), level(-1) { }
140 const Instruction
*tex
; // or split / mov
146 Limits(int min
, int max
) : min(min
), max(max
) { }
149 bool insertTextureBarriers(Function
*);
150 inline bool insnDominatedBy(const Instruction
*, const Instruction
*) const;
151 void findFirstUses(const Instruction
*tex
, const Instruction
*def
,
153 void findOverwritingDefs(const Instruction
*tex
, Instruction
*insn
,
154 const BasicBlock
*term
,
156 void addTexUse(std::list
<TexUse
>&, Instruction
*, const Instruction
*);
157 const Instruction
*recurseDef(const Instruction
*);
161 const bool needTexBar
;
164 NVC0LegalizePostRA::NVC0LegalizePostRA(const Program
*prog
)
165 : needTexBar(prog
->getTarget()->getChipset() >= 0xe0)
170 NVC0LegalizePostRA::insnDominatedBy(const Instruction
*later
,
171 const Instruction
*early
) const
173 if (early
->bb
== later
->bb
)
174 return early
->serial
< later
->serial
;
175 return later
->bb
->dominatedBy(early
->bb
);
179 NVC0LegalizePostRA::addTexUse(std::list
<TexUse
> &uses
,
180 Instruction
*usei
, const Instruction
*insn
)
183 for (std::list
<TexUse
>::iterator it
= uses
.begin();
185 if (insnDominatedBy(usei
, it
->insn
)) {
189 if (insnDominatedBy(it
->insn
, usei
))
195 uses
.push_back(TexUse(usei
, insn
));
199 NVC0LegalizePostRA::findOverwritingDefs(const Instruction
*texi
,
201 const BasicBlock
*term
,
202 std::list
<TexUse
> &uses
)
204 while (insn
->op
== OP_MOV
&& insn
->getDef(0)->equals(insn
->getSrc(0)))
205 insn
= insn
->getSrc(0)->getUniqueInsn();
207 if (!insn
|| !insn
->bb
->reachableBy(texi
->bb
, term
))
211 /* Values not connected to the tex's definition through any of these should
212 * not be conflicting.
219 for (int s
= 0; insn
->srcExists(s
); ++s
)
220 findOverwritingDefs(texi
, insn
->getSrc(s
)->getUniqueInsn(), term
,
224 // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ?
225 addTexUse(uses
, insn
, texi
);
231 NVC0LegalizePostRA::findFirstUses(const Instruction
*texi
,
232 const Instruction
*insn
,
233 std::list
<TexUse
> &uses
)
235 for (int d
= 0; insn
->defExists(d
); ++d
) {
236 Value
*v
= insn
->getDef(d
);
237 for (Value::UseIterator u
= v
->uses
.begin(); u
!= v
->uses
.end(); ++u
) {
238 Instruction
*usei
= (*u
)->getInsn();
240 if (usei
->op
== OP_PHI
|| usei
->op
== OP_UNION
) {
241 // need a barrier before WAW cases
242 for (int s
= 0; usei
->srcExists(s
); ++s
) {
243 Instruction
*defi
= usei
->getSrc(s
)->getUniqueInsn();
244 if (defi
&& &usei
->src(s
) != *u
)
245 findOverwritingDefs(texi
, defi
, usei
->bb
, uses
);
249 if (usei
->op
== OP_SPLIT
||
250 usei
->op
== OP_MERGE
||
251 usei
->op
== OP_PHI
||
252 usei
->op
== OP_UNION
) {
253 // these uses don't manifest in the machine code
254 findFirstUses(texi
, usei
, uses
);
256 if (usei
->op
== OP_MOV
&& usei
->getDef(0)->equals(usei
->getSrc(0)) &&
257 usei
->subOp
!= NV50_IR_SUBOP_MOV_FINAL
) {
258 findFirstUses(texi
, usei
, uses
);
260 addTexUse(uses
, usei
, insn
);
267 // This pass is a bit long and ugly and can probably be optimized.
269 // 1. obtain a list of TEXes and their outputs' first use(s)
270 // 2. calculate the barrier level of each first use (minimal number of TEXes,
271 // over all paths, between the TEX and the use in question)
272 // 3. for each barrier, if all paths from the source TEX to that barrier
273 // contain a barrier of lesser level, it can be culled
275 NVC0LegalizePostRA::insertTextureBarriers(Function
*fn
)
277 std::list
<TexUse
> *uses
;
278 std::vector
<Instruction
*> texes
;
279 std::vector
<int> bbFirstTex
;
280 std::vector
<int> bbFirstUse
;
281 std::vector
<int> texCounts
;
282 std::vector
<TexUse
> useVec
;
285 fn
->orderInstructions(insns
);
287 texCounts
.resize(fn
->allBBlocks
.getSize(), 0);
288 bbFirstTex
.resize(fn
->allBBlocks
.getSize(), insns
.getSize());
289 bbFirstUse
.resize(fn
->allBBlocks
.getSize(), insns
.getSize());
291 // tag BB CFG nodes by their id for later
292 for (ArrayList::Iterator i
= fn
->allBBlocks
.iterator(); !i
.end(); i
.next()) {
293 BasicBlock
*bb
= reinterpret_cast<BasicBlock
*>(i
.get());
295 bb
->cfg
.tag
= bb
->getId();
298 // gather the first uses for each TEX
299 for (int i
= 0; i
< insns
.getSize(); ++i
) {
300 Instruction
*tex
= reinterpret_cast<Instruction
*>(insns
.get(i
));
301 if (isTextureOp(tex
->op
)) {
302 texes
.push_back(tex
);
303 if (!texCounts
.at(tex
->bb
->getId()))
304 bbFirstTex
[tex
->bb
->getId()] = texes
.size() - 1;
305 texCounts
[tex
->bb
->getId()]++;
311 uses
= new std::list
<TexUse
>[texes
.size()];
314 for (size_t i
= 0; i
< texes
.size(); ++i
)
315 findFirstUses(texes
[i
], texes
[i
], uses
[i
]);
317 // determine the barrier level at each use
318 for (size_t i
= 0; i
< texes
.size(); ++i
) {
319 for (std::list
<TexUse
>::iterator u
= uses
[i
].begin(); u
!= uses
[i
].end();
321 BasicBlock
*tb
= texes
[i
]->bb
;
322 BasicBlock
*ub
= u
->insn
->bb
;
325 for (size_t j
= i
+ 1; j
< texes
.size() &&
326 texes
[j
]->bb
== tb
&& texes
[j
]->serial
< u
->insn
->serial
;
330 u
->level
= fn
->cfg
.findLightestPathWeight(&tb
->cfg
,
331 &ub
->cfg
, texCounts
);
333 WARN("Failed to find path TEX -> TEXBAR\n");
337 // this counted all TEXes in the origin block, correct that
338 u
->level
-= i
- bbFirstTex
.at(tb
->getId()) + 1 /* this TEX */;
339 // and did not count the TEXes in the destination block, add those
340 for (size_t j
= bbFirstTex
.at(ub
->getId()); j
< texes
.size() &&
341 texes
[j
]->bb
== ub
&& texes
[j
]->serial
< u
->insn
->serial
;
345 assert(u
->level
>= 0);
346 useVec
.push_back(*u
);
352 // insert the barriers
353 for (size_t i
= 0; i
< useVec
.size(); ++i
) {
354 Instruction
*prev
= useVec
[i
].insn
->prev
;
355 if (useVec
[i
].level
< 0)
357 if (prev
&& prev
->op
== OP_TEXBAR
) {
358 if (prev
->subOp
> useVec
[i
].level
)
359 prev
->subOp
= useVec
[i
].level
;
360 prev
->setSrc(prev
->srcCount(), useVec
[i
].tex
->getDef(0));
362 Instruction
*bar
= new_Instruction(func
, OP_TEXBAR
, TYPE_NONE
);
364 bar
->subOp
= useVec
[i
].level
;
365 // make use explicit to ease latency calculation
366 bar
->setSrc(bar
->srcCount(), useVec
[i
].tex
->getDef(0));
367 useVec
[i
].insn
->bb
->insertBefore(useVec
[i
].insn
, bar
);
371 if (fn
->getProgram()->optLevel
< 3) {
377 std::vector
<Limits
> limitT
, limitB
, limitS
; // entry, exit, single
379 limitT
.resize(fn
->allBBlocks
.getSize(), Limits(0, 0));
380 limitB
.resize(fn
->allBBlocks
.getSize(), Limits(0, 0));
381 limitS
.resize(fn
->allBBlocks
.getSize());
383 // cull unneeded barriers (should do that earlier, but for simplicity)
384 IteratorRef bi
= fn
->cfg
.iteratorCFG();
385 // first calculate min/max outstanding TEXes for each BB
386 for (bi
->reset(); !bi
->end(); bi
->next()) {
387 Graph::Node
*n
= reinterpret_cast<Graph::Node
*>(bi
->get());
388 BasicBlock
*bb
= BasicBlock::get(n
);
390 int max
= std::numeric_limits
<int>::max();
391 for (Instruction
*i
= bb
->getFirst(); i
; i
= i
->next
) {
392 if (isTextureOp(i
->op
)) {
394 if (max
< std::numeric_limits
<int>::max())
397 if (i
->op
== OP_TEXBAR
) {
398 min
= MIN2(min
, i
->subOp
);
399 max
= MIN2(max
, i
->subOp
);
402 // limits when looking at an isolated block
403 limitS
[bb
->getId()].min
= min
;
404 limitS
[bb
->getId()].max
= max
;
406 // propagate the min/max values
407 for (unsigned int l
= 0; l
<= fn
->loopNestingBound
; ++l
) {
408 for (bi
->reset(); !bi
->end(); bi
->next()) {
409 Graph::Node
*n
= reinterpret_cast<Graph::Node
*>(bi
->get());
410 BasicBlock
*bb
= BasicBlock::get(n
);
411 const int bbId
= bb
->getId();
412 for (Graph::EdgeIterator ei
= n
->incident(); !ei
.end(); ei
.next()) {
413 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
414 const int inId
= in
->getId();
415 limitT
[bbId
].min
= MAX2(limitT
[bbId
].min
, limitB
[inId
].min
);
416 limitT
[bbId
].max
= MAX2(limitT
[bbId
].max
, limitB
[inId
].max
);
418 // I just hope this is correct ...
419 if (limitS
[bbId
].max
== std::numeric_limits
<int>::max()) {
421 limitB
[bbId
].min
= limitT
[bbId
].min
+ limitS
[bbId
].min
;
422 limitB
[bbId
].max
= limitT
[bbId
].max
+ limitS
[bbId
].min
;
424 // block contained a barrier
425 limitB
[bbId
].min
= MIN2(limitS
[bbId
].max
,
426 limitT
[bbId
].min
+ limitS
[bbId
].min
);
427 limitB
[bbId
].max
= MIN2(limitS
[bbId
].max
,
428 limitT
[bbId
].max
+ limitS
[bbId
].min
);
432 // finally delete unnecessary barriers
433 for (bi
->reset(); !bi
->end(); bi
->next()) {
434 Graph::Node
*n
= reinterpret_cast<Graph::Node
*>(bi
->get());
435 BasicBlock
*bb
= BasicBlock::get(n
);
436 Instruction
*prev
= NULL
;
438 int max
= limitT
[bb
->getId()].max
;
439 for (Instruction
*i
= bb
->getFirst(); i
; i
= next
) {
441 if (i
->op
== OP_TEXBAR
) {
442 if (i
->subOp
>= max
) {
443 delete_Instruction(prog
, i
);
446 if (prev
&& prev
->op
== OP_TEXBAR
&& prev
->subOp
>= max
) {
447 delete_Instruction(prog
, prev
);
452 if (isTextureOp(i
->op
)) {
465 NVC0LegalizePostRA::visit(Function
*fn
)
468 insertTextureBarriers(fn
);
470 rZero
= new_LValue(fn
, FILE_GPR
);
471 rZero
->reg
.data
.id
= prog
->getTarget()->getFileSize(FILE_GPR
);
476 NVC0LegalizePostRA::replaceZero(Instruction
*i
)
478 for (int s
= 0; i
->srcExists(s
); ++s
) {
479 ImmediateValue
*imm
= i
->getSrc(s
)->asImm();
480 if (imm
&& imm
->reg
.data
.u64
== 0)
486 NVC0LegalizePostRA::split64BitOp(Instruction
*i
)
488 if (i
->dType
== TYPE_F64
) {
491 if (i
->op
== OP_ADD
|| i
->op
== OP_MUL
|| i
->op
== OP_FMA
||
492 i
->op
== OP_CVT
|| i
->op
== OP_MIN
|| i
->op
== OP_MAX
||
495 i
->dType
= i
->sType
= TYPE_U32
;
497 i
->bb
->insertAfter(i
, cloneForward(func
, i
));
501 // replace CONT with BRA for single unconditional continue
503 NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock
*bb
)
505 if (bb
->cfg
.incidentCount() != 2 || bb
->getEntry()->op
!= OP_PRECONT
)
507 Graph::EdgeIterator ei
= bb
->cfg
.incident();
508 if (ei
.getType() != Graph::Edge::BACK
)
510 if (ei
.getType() != Graph::Edge::BACK
)
512 BasicBlock
*contBB
= BasicBlock::get(ei
.getNode());
514 if (!contBB
->getExit() || contBB
->getExit()->op
!= OP_CONT
||
515 contBB
->getExit()->getPredicate())
517 contBB
->getExit()->op
= OP_BRA
;
518 bb
->remove(bb
->getEntry()); // delete PRECONT
521 assert(ei
.end() || ei
.getType() != Graph::Edge::BACK
);
525 // replace branches to join blocks with join ops
527 NVC0LegalizePostRA::propagateJoin(BasicBlock
*bb
)
529 if (bb
->getEntry()->op
!= OP_JOIN
|| bb
->getEntry()->asFlow()->limit
)
531 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
532 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
533 Instruction
*exit
= in
->getExit();
535 in
->insertTail(new FlowInstruction(func
, OP_JOIN
, bb
));
536 // there should always be a terminator instruction
537 WARN("inserted missing terminator in BB:%i\n", in
->getId());
539 if (exit
->op
== OP_BRA
) {
541 exit
->asFlow()->limit
= 1; // must-not-propagate marker
544 bb
->remove(bb
->getEntry());
548 NVC0LegalizePostRA::visit(BasicBlock
*bb
)
550 Instruction
*i
, *next
;
552 // remove pseudo operations and non-fixed no-ops, split 64 bit operations
553 for (i
= bb
->getFirst(); i
; i
= next
) {
555 if (i
->op
== OP_EMIT
|| i
->op
== OP_RESTART
) {
556 if (!i
->getDef(0)->refCount())
558 if (i
->src(0).getFile() == FILE_IMMEDIATE
)
559 i
->setSrc(0, rZero
); // initial value must be 0
564 if (i
->op
!= OP_MOV
&& i
->op
!= OP_PFETCH
)
566 if (typeSizeof(i
->dType
) == 8)
573 if (!tryReplaceContWithBra(bb
))
579 class NVC0LoweringPass
: public Pass
582 NVC0LoweringPass(Program
*);
585 virtual bool visit(Function
*);
586 virtual bool visit(BasicBlock
*);
587 virtual bool visit(Instruction
*);
589 bool handleRDSV(Instruction
*);
590 bool handleWRSV(Instruction
*);
591 bool handleEXPORT(Instruction
*);
592 bool handleOUT(Instruction
*);
593 bool handleDIV(Instruction
*);
594 bool handleMOD(Instruction
*);
595 bool handleSQRT(Instruction
*);
596 bool handlePOW(Instruction
*);
597 bool handleTEX(TexInstruction
*);
598 bool handleTXD(TexInstruction
*);
599 bool handleTXQ(TexInstruction
*);
600 bool handleManualTXD(TexInstruction
*);
602 void checkPredicate(Instruction
*);
604 void readTessCoord(LValue
*dst
, int c
);
607 const Target
*const targ
;
611 LValue
*gpEmitAddress
;
614 NVC0LoweringPass::NVC0LoweringPass(Program
*prog
) : targ(prog
->getTarget())
616 bld
.setProgram(prog
);
620 NVC0LoweringPass::visit(Function
*fn
)
622 if (prog
->getType() == Program::TYPE_GEOMETRY
) {
623 assert(!strncmp(fn
->getName(), "MAIN", 4));
624 // TODO: when we generate actual functions pass this value along somehow
625 bld
.setPosition(BasicBlock::get(fn
->cfg
.getRoot()), false);
626 gpEmitAddress
= bld
.loadImm(NULL
, 0)->asLValue();
628 bld
.setPosition(BasicBlock::get(fn
->cfgExit
)->getExit(), false);
629 bld
.mkMovToReg(0, gpEmitAddress
);
636 NVC0LoweringPass::visit(BasicBlock
*bb
)
641 // move array source to first slot, convert to u16, add indirections
643 NVC0LoweringPass::handleTEX(TexInstruction
*i
)
645 const int dim
= i
->tex
.target
.getDim() + i
->tex
.target
.isCube();
646 const int arg
= i
->tex
.target
.getArgCount();
648 if (prog
->getTarget()->getChipset() >= 0xe0) {
649 if (i
->tex
.r
== i
->tex
.s
) {
650 i
->tex
.r
+= 8; // NOTE: offset should probably be a driver option
651 i
->tex
.s
= 0; // only a single cX[] value possible here
653 // TODO: extract handles and use register to select TIC/TSC entries
655 if (i
->tex
.target
.isArray()) {
656 LValue
*layer
= new_LValue(func
, FILE_GPR
);
657 Value
*src
= i
->getSrc(arg
- 1);
658 const int sat
= (i
->op
== OP_TXF
) ? 1 : 0;
659 DataType sTy
= (i
->op
== OP_TXF
) ? TYPE_U32
: TYPE_F32
;
660 bld
.mkCvt(OP_CVT
, TYPE_U16
, layer
, sTy
, src
)->saturate
= sat
;
661 for (int s
= dim
; s
>= 1; --s
)
662 i
->setSrc(s
, i
->getSrc(s
- 1));
665 if (i
->tex
.rIndirectSrc
>= 0 || i
->tex
.sIndirectSrc
>= 0) {
668 Value
*rRel
= i
->getIndirectR();
669 Value
*sRel
= i
->getIndirectS();
670 Value
*shCnt
= bld
.loadImm(NULL
, 2);
673 tmp
[0] = bld
.getScratch();
674 bind
= bld
.mkSymbol(FILE_MEMORY_CONST
, 15, TYPE_U32
, i
->tex
.r
* 4);
675 bld
.mkOp2(OP_SHL
, TYPE_U32
, tmp
[0], rRel
, shCnt
);
676 tmp
[1] = bld
.mkLoad(TYPE_U32
, bind
, tmp
[0]);
677 bld
.mkOp2(OP_AND
, TYPE_U32
, tmp
[0], tmp
[1],
678 bld
.loadImm(tmp
[0], 0x00ffffffu
));
680 i
->setSrc(i
->tex
.rIndirectSrc
, NULL
);
683 tmp
[0] = bld
.getScratch();
684 bind
= bld
.mkSymbol(FILE_MEMORY_CONST
, 15, TYPE_U32
, i
->tex
.s
* 4);
685 bld
.mkOp2(OP_SHL
, TYPE_U32
, tmp
[0], sRel
, shCnt
);
686 tmp
[1] = bld
.mkLoad(TYPE_U32
, bind
, tmp
[0]);
687 bld
.mkOp2(OP_AND
, TYPE_U32
, tmp
[0], tmp
[1],
688 bld
.loadImm(tmp
[0], 0xff000000u
));
690 i
->setSrc(i
->tex
.sIndirectSrc
, NULL
);
692 bld
.mkOp2(OP_OR
, TYPE_U32
, rRel
, rRel
, sRel
);
694 int min
= i
->tex
.rIndirectSrc
;
695 if (min
< 0 || min
> i
->tex
.sIndirectSrc
)
696 min
= i
->tex
.sIndirectSrc
;
697 for (int s
= min
; s
>= 1; --s
)
698 i
->setSrc(s
, i
->getSrc(s
- 1));
702 // (nvc0) generate and move the tsc/tic/array source to the front
703 if (dim
!= arg
|| i
->tex
.rIndirectSrc
>= 0 || i
->tex
.sIndirectSrc
>= 0) {
704 LValue
*src
= new_LValue(func
, FILE_GPR
); // 0xttxsaaaa
706 Value
*arrayIndex
= i
->tex
.target
.isArray() ? i
->getSrc(arg
- 1) : NULL
;
707 for (int s
= dim
; s
>= 1; --s
)
708 i
->setSrc(s
, i
->getSrc(s
- 1));
709 i
->setSrc(0, arrayIndex
);
711 Value
*ticRel
= i
->getIndirectR();
712 Value
*tscRel
= i
->getIndirectS();
715 int sat
= (i
->op
== OP_TXF
) ? 1 : 0;
716 DataType sTy
= (i
->op
== OP_TXF
) ? TYPE_U32
: TYPE_F32
;
717 bld
.mkCvt(OP_CVT
, TYPE_U16
, src
, sTy
, arrayIndex
)->saturate
= sat
;
723 i
->setSrc(i
->tex
.rIndirectSrc
, NULL
);
724 bld
.mkOp3(OP_INSBF
, TYPE_U32
, src
, ticRel
, bld
.mkImm(0x0917), src
);
727 i
->setSrc(i
->tex
.sIndirectSrc
, NULL
);
728 bld
.mkOp3(OP_INSBF
, TYPE_U32
, src
, tscRel
, bld
.mkImm(0x0710), src
);
734 // offset is last source (lod 1st, dc 2nd)
735 if (i
->tex
.useOffsets
) {
738 int s
= i
->srcCount(0xff);
739 for (n
= 0; n
< i
->tex
.useOffsets
; ++n
)
740 for (c
= 0; c
< 3; ++c
)
741 value
|= (i
->tex
.offset
[n
][c
] & 0xf) << (n
* 12 + c
* 4);
742 i
->setSrc(s
, bld
.loadImm(NULL
, value
));
749 NVC0LoweringPass::handleManualTXD(TexInstruction
*i
)
751 static const uint8_t qOps
[4][2] =
753 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l0
754 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l1
755 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l2
756 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l3
761 Value
*zero
= bld
.loadImm(bld
.getSSA(), 0);
763 const int dim
= i
->tex
.target
.getDim();
765 i
->op
= OP_TEX
; // no need to clone dPdx/dPdy later
767 for (c
= 0; c
< dim
; ++c
)
768 crd
[c
] = bld
.getScratch();
770 bld
.mkOp(OP_QUADON
, TYPE_NONE
, NULL
);
771 for (l
= 0; l
< 4; ++l
) {
772 // mov coordinates from lane l to all lanes
773 for (c
= 0; c
< dim
; ++c
)
774 bld
.mkQuadop(0x00, crd
[c
], l
, i
->getSrc(c
), zero
);
775 // add dPdx from lane l to lanes dx
776 for (c
= 0; c
< dim
; ++c
)
777 bld
.mkQuadop(qOps
[l
][0], crd
[c
], l
, i
->dPdx
[c
].get(), crd
[c
]);
778 // add dPdy from lane l to lanes dy
779 for (c
= 0; c
< dim
; ++c
)
780 bld
.mkQuadop(qOps
[l
][1], crd
[c
], l
, i
->dPdy
[c
].get(), crd
[c
]);
782 bld
.insert(tex
= cloneForward(func
, i
));
783 for (c
= 0; c
< dim
; ++c
)
784 tex
->setSrc(c
, crd
[c
]);
786 for (c
= 0; i
->defExists(c
); ++c
) {
788 def
[c
][l
] = bld
.getSSA();
789 mov
= bld
.mkMov(def
[c
][l
], tex
->getDef(c
));
794 bld
.mkOp(OP_QUADPOP
, TYPE_NONE
, NULL
);
796 for (c
= 0; i
->defExists(c
); ++c
) {
797 Instruction
*u
= bld
.mkOp(OP_UNION
, TYPE_U32
, i
->getDef(c
));
798 for (l
= 0; l
< 4; ++l
)
799 u
->setSrc(l
, def
[c
][l
]);
807 NVC0LoweringPass::handleTXD(TexInstruction
*txd
)
809 int dim
= txd
->tex
.target
.getDim();
810 int arg
= txd
->tex
.target
.getArgCount();
813 while (txd
->srcExists(arg
))
816 txd
->tex
.derivAll
= true;
818 txd
->tex
.target
.isCube() ||
820 txd
->tex
.target
.isShadow())
821 return handleManualTXD(txd
);
823 for (int c
= 0; c
< dim
; ++c
) {
824 txd
->setSrc(arg
+ c
* 2 + 0, txd
->dPdx
[c
]);
825 txd
->setSrc(arg
+ c
* 2 + 1, txd
->dPdy
[c
]);
826 txd
->dPdx
[c
].set(NULL
);
827 txd
->dPdy
[c
].set(NULL
);
833 NVC0LoweringPass::handleTXQ(TexInstruction
*txq
)
835 // TODO: indirect resource/sampler index
840 NVC0LoweringPass::handleWRSV(Instruction
*i
)
846 // must replace, $sreg are not writeable
847 addr
= targ
->getSVAddress(FILE_SHADER_OUTPUT
, i
->getSrc(0)->asSym());
850 sym
= bld
.mkSymbol(FILE_SHADER_OUTPUT
, 0, i
->sType
, addr
);
852 st
= bld
.mkStore(OP_EXPORT
, i
->dType
, sym
, i
->getIndirect(0, 0),
854 st
->perPatch
= i
->perPatch
;
856 bld
.getBB()->remove(i
);
861 NVC0LoweringPass::readTessCoord(LValue
*dst
, int c
)
863 Value
*laneid
= bld
.getSSA();
866 bld
.mkOp1(OP_RDSV
, TYPE_U32
, laneid
, bld
.mkSysVal(SV_LANEID
, 0));
881 bld
.mkFetch(x
, TYPE_F32
, FILE_SHADER_OUTPUT
, 0x2f0, NULL
, laneid
);
883 bld
.mkFetch(y
, TYPE_F32
, FILE_SHADER_OUTPUT
, 0x2f4, NULL
, laneid
);
886 bld
.mkOp2(OP_ADD
, TYPE_F32
, dst
, x
, y
);
887 bld
.mkOp2(OP_SUB
, TYPE_F32
, dst
, bld
.loadImm(NULL
, 1.0f
), dst
);
892 NVC0LoweringPass::handleRDSV(Instruction
*i
)
894 Symbol
*sym
= i
->getSrc(0)->asSym();
897 uint32_t addr
= targ
->getSVAddress(FILE_SHADER_INPUT
, sym
);
899 if (addr
>= 0x400) // mov $sreg
902 switch (i
->getSrc(0)->reg
.data
.sv
.sv
) {
904 assert(prog
->getType() == Program::TYPE_FRAGMENT
);
905 bld
.mkInterp(NV50_IR_INTERP_LINEAR
, i
->getDef(0), addr
, NULL
);
909 Value
*face
= i
->getDef(0);
910 bld
.mkInterp(NV50_IR_INTERP_FLAT
, face
, addr
, NULL
);
911 if (i
->dType
== TYPE_F32
) {
912 bld
.mkOp2(OP_AND
, TYPE_U32
, face
, face
, bld
.mkImm(0x80000000));
913 bld
.mkOp2(OP_XOR
, TYPE_U32
, face
, face
, bld
.mkImm(0xbf800000));
918 assert(prog
->getType() == Program::TYPE_TESSELLATION_EVAL
);
919 readTessCoord(i
->getDef(0)->asLValue(), i
->getSrc(0)->reg
.data
.sv
.index
);
922 if (prog
->getType() == Program::TYPE_TESSELLATION_EVAL
)
923 vtx
= bld
.mkOp1v(OP_PFETCH
, TYPE_U32
, bld
.getSSA(), bld
.mkImm(0));
924 ld
= bld
.mkFetch(i
->getDef(0), i
->dType
,
925 FILE_SHADER_INPUT
, addr
, i
->getIndirect(0, 0), vtx
);
926 ld
->perPatch
= i
->perPatch
;
929 bld
.getBB()->remove(i
);
934 NVC0LoweringPass::handleDIV(Instruction
*i
)
936 if (!isFloatType(i
->dType
))
938 bld
.setPosition(i
, false);
939 Instruction
*rcp
= bld
.mkOp1(OP_RCP
, i
->dType
, bld
.getSSA(), i
->getSrc(1));
941 i
->setSrc(1, rcp
->getDef(0));
946 NVC0LoweringPass::handleMOD(Instruction
*i
)
948 if (i
->dType
!= TYPE_F32
)
950 LValue
*value
= bld
.getScratch();
951 bld
.mkOp1(OP_RCP
, TYPE_F32
, value
, i
->getSrc(1));
952 bld
.mkOp2(OP_MUL
, TYPE_F32
, value
, i
->getSrc(0), value
);
953 bld
.mkOp1(OP_TRUNC
, TYPE_F32
, value
, value
);
954 bld
.mkOp2(OP_MUL
, TYPE_F32
, value
, i
->getSrc(1), value
);
961 NVC0LoweringPass::handleSQRT(Instruction
*i
)
963 Instruction
*rsq
= bld
.mkOp1(OP_RSQ
, TYPE_F32
,
964 bld
.getSSA(), i
->getSrc(0));
966 i
->setSrc(1, rsq
->getDef(0));
972 NVC0LoweringPass::handlePOW(Instruction
*i
)
974 LValue
*val
= bld
.getScratch();
976 bld
.mkOp1(OP_LG2
, TYPE_F32
, val
, i
->getSrc(0));
977 bld
.mkOp2(OP_MUL
, TYPE_F32
, val
, i
->getSrc(1), val
)->dnz
= 1;
978 bld
.mkOp1(OP_PREEX2
, TYPE_F32
, val
, val
);
988 NVC0LoweringPass::handleEXPORT(Instruction
*i
)
990 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
991 int id
= i
->getSrc(0)->reg
.data
.offset
/ 4;
993 if (i
->src(0).isIndirect(0)) // TODO, ugly
996 i
->subOp
= NV50_IR_SUBOP_MOV_FINAL
;
997 i
->src(0).set(i
->src(1));
999 i
->setDef(0, new_LValue(func
, FILE_GPR
));
1000 i
->getDef(0)->reg
.data
.id
= id
;
1002 prog
->maxGPR
= MAX2(prog
->maxGPR
, id
);
1004 if (prog
->getType() == Program::TYPE_GEOMETRY
) {
1005 i
->setIndirect(0, 1, gpEmitAddress
);
1011 NVC0LoweringPass::handleOUT(Instruction
*i
)
1013 if (i
->op
== OP_RESTART
&& i
->prev
&& i
->prev
->op
== OP_EMIT
) {
1014 i
->prev
->subOp
= NV50_IR_SUBOP_EMIT_RESTART
;
1015 delete_Instruction(prog
, i
);
1017 assert(gpEmitAddress
);
1018 i
->setDef(0, gpEmitAddress
);
1019 if (i
->srcExists(0))
1020 i
->setSrc(1, i
->getSrc(0));
1021 i
->setSrc(0, gpEmitAddress
);
1026 // Generate a binary predicate if an instruction is predicated by
1027 // e.g. an f32 value.
1029 NVC0LoweringPass::checkPredicate(Instruction
*insn
)
1031 Value
*pred
= insn
->getPredicate();
1034 if (!pred
|| pred
->reg
.file
== FILE_PREDICATE
)
1036 pdst
= new_LValue(func
, FILE_PREDICATE
);
1038 // CAUTION: don't use pdst->getInsn, the definition might not be unique,
1039 // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
1041 bld
.mkCmp(OP_SET
, CC_NEU
, TYPE_U32
, pdst
, bld
.mkImm(0), pred
);
1043 insn
->setPredicate(insn
->cc
, pdst
);
1047 // - add quadop dance for texturing
1048 // - put FP outputs in GPRs
1049 // - convert instruction sequences
1052 NVC0LoweringPass::visit(Instruction
*i
)
1054 bld
.setPosition(i
, false);
1056 if (i
->cc
!= CC_ALWAYS
)
1065 return handleTEX(i
->asTex());
1067 return handleTXD(i
->asTex());
1069 return handleTXQ(i
->asTex());
1071 bld
.mkOp1(OP_PREEX2
, TYPE_F32
, i
->getDef(0), i
->getSrc(0));
1072 i
->setSrc(0, i
->getDef(0));
1075 return handlePOW(i
);
1077 return handleDIV(i
);
1079 return handleMOD(i
);
1081 return handleSQRT(i
);
1083 return handleEXPORT(i
);
1086 return handleOUT(i
);
1088 return handleRDSV(i
);
1090 return handleWRSV(i
);
1092 if (i
->src(0).getFile() == FILE_SHADER_INPUT
) {
1094 assert(prog
->getType() != Program::TYPE_FRAGMENT
);
1104 TargetNVC0::runLegalizePass(Program
*prog
, CGStage stage
) const
1106 if (stage
== CG_STAGE_PRE_SSA
) {
1107 NVC0LoweringPass
pass(prog
);
1108 return pass
.run(prog
, false, true);
1110 if (stage
== CG_STAGE_POST_RA
) {
1111 NVC0LegalizePostRA
pass(prog
);
1112 return pass
.run(prog
, false, true);
1114 if (stage
== CG_STAGE_SSA
) {
1115 NVC0LegalizeSSA pass
;
1116 return pass
.run(prog
, false, true);
1121 } // namespace nv50_ir