2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "nv50/codegen/nv50_ir.h"
24 #include "nv50/codegen/nv50_ir_build_util.h"
26 #include "nv50_ir_target_nvc0.h"
35 #define QUADOP(q, r, s, t) \
36 ((QOP_##q << 0) | (QOP_##r << 2) | \
37 (QOP_##s << 4) | (QOP_##t << 6))
39 class NVC0LegalizeSSA
: public Pass
42 virtual bool visit(BasicBlock
*);
43 virtual bool visit(Function
*);
45 // we want to insert calls to the builtin library only after optimization
46 void handleDIV(Instruction
*); // integer division, modulus
47 void handleRCPRSQ(Instruction
*); // double precision float recip/rsqrt
54 NVC0LegalizeSSA::handleDIV(Instruction
*i
)
56 FlowInstruction
*call
;
60 bld
.setPosition(i
, false);
61 def
[0] = bld
.mkMovToReg(0, i
->getSrc(0))->getDef(0);
62 def
[1] = bld
.mkMovToReg(1, i
->getSrc(1))->getDef(0);
64 case TYPE_U32
: builtin
= NVC0_BUILTIN_DIV_U32
; break;
65 case TYPE_S32
: builtin
= NVC0_BUILTIN_DIV_S32
; break;
69 call
= bld
.mkFlow(OP_CALL
, NULL
, CC_ALWAYS
, NULL
);
70 bld
.mkMov(i
->getDef(0), def
[(i
->op
== OP_DIV
) ? 0 : 1]);
71 bld
.mkClobber(FILE_GPR
, (i
->op
== OP_DIV
) ? 0xe : 0xd, 2);
72 bld
.mkClobber(FILE_PREDICATE
, (i
->dType
== TYPE_S32
) ? 0xf : 0x3, 0);
75 call
->absolute
= call
->builtin
= 1;
76 call
->target
.builtin
= builtin
;
77 delete_Instruction(prog
, i
);
81 NVC0LegalizeSSA::handleRCPRSQ(Instruction
*i
)
87 NVC0LegalizeSSA::visit(Function
*fn
)
89 bld
.setProgram(fn
->getProgram());
94 NVC0LegalizeSSA::visit(BasicBlock
*bb
)
97 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
99 if (i
->dType
== TYPE_F32
)
108 if (i
->dType
== TYPE_F64
)
118 class NVC0LegalizePostRA
: public Pass
121 virtual bool visit(Function
*);
122 virtual bool visit(BasicBlock
*);
124 void replaceZero(Instruction
*);
125 void split64BitOp(Instruction
*);
126 bool tryReplaceContWithBra(BasicBlock
*);
127 void propagateJoin(BasicBlock
*);
133 NVC0LegalizePostRA::visit(Function
*fn
)
135 r63
= new_LValue(fn
, FILE_GPR
);
136 r63
->reg
.data
.id
= 63;
141 NVC0LegalizePostRA::replaceZero(Instruction
*i
)
143 for (int s
= 0; i
->srcExists(s
); ++s
) {
144 ImmediateValue
*imm
= i
->getSrc(s
)->asImm();
145 if (imm
&& imm
->reg
.data
.u64
== 0)
151 NVC0LegalizePostRA::split64BitOp(Instruction
*i
)
153 if (i
->dType
== TYPE_F64
) {
156 if (i
->op
== OP_ADD
|| i
->op
== OP_MUL
|| i
->op
== OP_FMA
||
157 i
->op
== OP_CVT
|| i
->op
== OP_MIN
|| i
->op
== OP_MAX
||
160 i
->dType
= i
->sType
= TYPE_U32
;
162 i
->bb
->insertAfter(i
, i
->clone(true)); // deep cloning
166 // replace CONT with BRA for single unconditional continue
168 NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock
*bb
)
170 if (bb
->cfg
.incidentCount() != 2 || bb
->getEntry()->op
!= OP_PRECONT
)
172 Graph::EdgeIterator ei
= bb
->cfg
.incident();
173 if (ei
.getType() != Graph::Edge::BACK
)
175 if (ei
.getType() != Graph::Edge::BACK
)
177 BasicBlock
*contBB
= BasicBlock::get(ei
.getNode());
179 if (!contBB
->getExit() || contBB
->getExit()->op
!= OP_CONT
||
180 contBB
->getExit()->getPredicate())
182 contBB
->getExit()->op
= OP_BRA
;
183 bb
->remove(bb
->getEntry()); // delete PRECONT
186 assert(ei
.end() || ei
.getType() != Graph::Edge::BACK
);
190 // replace branches to join blocks with join ops
192 NVC0LegalizePostRA::propagateJoin(BasicBlock
*bb
)
194 if (bb
->getEntry()->op
!= OP_JOIN
|| bb
->getEntry()->asFlow()->limit
)
196 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
197 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
198 Instruction
*exit
= in
->getExit();
200 in
->insertTail(new FlowInstruction(func
, OP_JOIN
, bb
));
201 // there should always be a terminator instruction
202 WARN("inserted missing terminator in BB:%i\n", in
->getId());
204 if (exit
->op
== OP_BRA
) {
206 exit
->asFlow()->limit
= 1; // must-not-propagate marker
209 bb
->remove(bb
->getEntry());
213 NVC0LegalizePostRA::visit(BasicBlock
*bb
)
215 Instruction
*i
, *next
;
217 // remove pseudo operations and non-fixed no-ops, split 64 bit operations
218 for (i
= bb
->getFirst(); i
; i
= next
) {
220 if (i
->op
== OP_EMIT
|| i
->op
== OP_RESTART
) {
221 if (!i
->getDef(0)->refCount())
223 if (i
->src
[0].getFile() == FILE_IMMEDIATE
)
224 i
->setSrc(0, r63
); // initial value must be 0
229 if (i
->op
!= OP_MOV
&& i
->op
!= OP_PFETCH
)
231 if (typeSizeof(i
->dType
) == 8)
238 if (!tryReplaceContWithBra(bb
))
244 class NVC0LoweringPass
: public Pass
247 NVC0LoweringPass(Program
*);
250 virtual bool visit(Function
*);
251 virtual bool visit(BasicBlock
*);
252 virtual bool visit(Instruction
*);
254 bool handleRDSV(Instruction
*);
255 bool handleWRSV(Instruction
*);
256 bool handleEXPORT(Instruction
*);
257 bool handleOUT(Instruction
*);
258 bool handleDIV(Instruction
*);
259 bool handleMOD(Instruction
*);
260 bool handleSQRT(Instruction
*);
261 bool handlePOW(Instruction
*);
262 bool handleTEX(TexInstruction
*);
263 bool handleTXD(TexInstruction
*);
264 bool handleManualTXD(TexInstruction
*);
266 void checkPredicate(Instruction
*);
268 void readTessCoord(LValue
*dst
, int c
);
271 const Target
*const targ
;
275 LValue
*gpEmitAddress
;
278 NVC0LoweringPass::NVC0LoweringPass(Program
*prog
) : targ(prog
->getTarget())
280 bld
.setProgram(prog
);
284 NVC0LoweringPass::visit(Function
*fn
)
286 if (prog
->getType() == Program::TYPE_GEOMETRY
) {
287 assert(!strncmp(fn
->getName(), "MAIN", 4));
288 // TODO: when we generate actual functions pass this value along somehow
289 bld
.setPosition(BasicBlock::get(fn
->cfg
.getRoot()), false);
290 gpEmitAddress
= bld
.loadImm(NULL
, 0)->asLValue();
296 NVC0LoweringPass::visit(BasicBlock
*bb
)
301 // move array source to first slot, convert to u16, add indirections
303 NVC0LoweringPass::handleTEX(TexInstruction
*i
)
305 const int dim
= i
->tex
.target
.getDim();
306 const int arg
= i
->tex
.target
.getDim() + i
->tex
.target
.isArray();
308 // generate and move the tsc/tic/array source to the front
309 if (dim
!= arg
|| i
->tex
.rIndirectSrc
>= 0 || i
->tex
.sIndirectSrc
>= 0) {
310 LValue
*src
= new_LValue(func
, FILE_GPR
); // 0xttxsaaaa
312 Value
*arrayIndex
= i
->tex
.target
.isArray() ? i
->getSrc(dim
) : NULL
;
313 for (int s
= dim
; s
>= 1; --s
)
314 i
->setSrc(s
, i
->getSrc(s
- 1));
315 i
->setSrc(0, arrayIndex
);
317 Value
*ticRel
= i
->getIndirectR();
318 Value
*tscRel
= i
->getIndirectS();
321 bld
.mkCvt(OP_CVT
, TYPE_U16
, src
, TYPE_F32
, arrayIndex
);
326 i
->setSrc(i
->tex
.rIndirectSrc
, NULL
);
327 bld
.mkOp3(OP_INSBF
, TYPE_U32
, src
, ticRel
, bld
.mkImm(0x0917), src
);
330 i
->setSrc(i
->tex
.sIndirectSrc
, NULL
);
331 bld
.mkOp3(OP_INSBF
, TYPE_U32
, src
, tscRel
, bld
.mkImm(0x0710), src
);
337 // offset is last source (lod 1st, dc 2nd)
338 if (i
->tex
.useOffsets
) {
341 int s
= i
->srcCount(0xff);
342 for (n
= 0; n
< i
->tex
.useOffsets
; ++n
)
343 for (c
= 0; c
< 3; ++c
)
344 value
|= (i
->tex
.offset
[n
][c
] & 0xf) << (n
* 12 + c
* 4);
345 i
->setSrc(s
, bld
.loadImm(NULL
, value
));
352 NVC0LoweringPass::handleManualTXD(TexInstruction
*i
)
354 static const uint8_t qOps
[4][2] =
356 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l0
357 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l1
358 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l2
359 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l3
364 Value
*zero
= bld
.loadImm(bld
.getSSA(), 0);
366 const int dim
= i
->tex
.target
.getDim();
368 i
->op
= OP_TEX
; // no need to clone dPdx/dPdy later
370 for (c
= 0; c
< dim
; ++c
)
371 crd
[c
] = bld
.getScratch();
373 bld
.mkOp(OP_QUADON
, TYPE_NONE
, NULL
);
374 for (l
= 0; l
< 4; ++l
) {
375 // mov coordinates from lane l to all lanes
376 for (c
= 0; c
< dim
; ++c
)
377 bld
.mkQuadop(0x00, crd
[c
], l
, i
->getSrc(c
), zero
);
378 // add dPdx from lane l to lanes dx
379 for (c
= 0; c
< dim
; ++c
)
380 bld
.mkQuadop(qOps
[l
][0], crd
[c
], l
, i
->dPdx
[c
].get(), crd
[c
]);
381 // add dPdy from lane l to lanes dy
382 for (c
= 0; c
< dim
; ++c
)
383 bld
.mkQuadop(qOps
[l
][1], crd
[c
], l
, i
->dPdy
[c
].get(), crd
[c
]);
385 bld
.insert(tex
= i
->clone(true));
386 for (c
= 0; c
< dim
; ++c
)
387 tex
->setSrc(c
, crd
[c
]);
389 for (c
= 0; i
->defExists(c
); ++c
) {
391 def
[c
][l
] = bld
.getSSA();
392 mov
= bld
.mkMov(def
[c
][l
], tex
->getDef(c
));
397 bld
.mkOp(OP_QUADPOP
, TYPE_NONE
, NULL
);
399 for (c
= 0; i
->defExists(c
); ++c
) {
400 Instruction
*u
= bld
.mkOp(OP_UNION
, TYPE_U32
, i
->getDef(c
));
401 for (l
= 0; l
< 4; ++l
)
402 u
->setSrc(l
, def
[c
][l
]);
410 NVC0LoweringPass::handleTXD(TexInstruction
*txd
)
412 int dim
= txd
->tex
.target
.getDim();
413 int arg
= txd
->tex
.target
.getDim() + txd
->tex
.target
.isArray();
416 if (txd
->src
[arg
].exists())
419 if (dim
> 2 || txd
->tex
.target
.isShadow())
420 return handleManualTXD(txd
);
422 // at most s/t/array, x, y, offset
423 assert(arg
<= 4 && !txd
->src
[arg
].exists());
425 for (int c
= 0; c
< dim
; ++c
) {
426 txd
->src
[arg
+ c
* 2 + 0].set(txd
->dPdx
[c
]);
427 txd
->src
[arg
+ c
* 2 + 1].set(txd
->dPdy
[c
]);
435 NVC0LoweringPass::handleWRSV(Instruction
*i
)
441 // must replace, $sreg are not writeable
442 addr
= targ
->getSVAddress(FILE_SHADER_OUTPUT
, i
->getSrc(0)->asSym());
445 sym
= bld
.mkSymbol(FILE_SHADER_OUTPUT
, 0, i
->sType
, addr
);
447 st
= bld
.mkStore(OP_EXPORT
, i
->dType
, sym
, i
->getIndirect(0, 0),
449 st
->perPatch
= i
->perPatch
;
451 bld
.getBB()->remove(i
);
456 NVC0LoweringPass::readTessCoord(LValue
*dst
, int c
)
458 Value
*laneid
= bld
.getSSA();
461 bld
.mkOp1(OP_RDSV
, TYPE_U32
, laneid
, bld
.mkSysVal(SV_LANEID
, 0));
476 bld
.mkFetch(x
, TYPE_F32
, FILE_SHADER_OUTPUT
, 0x2f0, NULL
, laneid
);
478 bld
.mkFetch(y
, TYPE_F32
, FILE_SHADER_OUTPUT
, 0x2f4, NULL
, laneid
);
481 bld
.mkOp2(OP_ADD
, TYPE_F32
, dst
, x
, y
);
482 bld
.mkOp2(OP_SUB
, TYPE_F32
, dst
, bld
.loadImm(NULL
, 1.0f
), dst
);
487 NVC0LoweringPass::handleRDSV(Instruction
*i
)
489 Symbol
*sym
= i
->getSrc(0)->asSym();
492 uint32_t addr
= targ
->getSVAddress(FILE_SHADER_INPUT
, sym
);
494 if (addr
>= 0x400) // mov $sreg
497 switch (i
->getSrc(0)->reg
.data
.sv
.sv
) {
499 assert(prog
->getType() == Program::TYPE_FRAGMENT
);
500 ld
= new_Instruction(func
, OP_LINTERP
, TYPE_F32
);
501 ld
->setDef(0, i
->getDef(0));
502 ld
->setSrc(0, bld
.mkSymbol(FILE_SHADER_INPUT
, 0, TYPE_F32
, addr
));
503 ld
->setInterpolate(NV50_IR_INTERP_LINEAR
);
504 bld
.getBB()->insertAfter(i
, ld
);
507 assert(prog
->getType() == Program::TYPE_TESSELLATION_EVAL
);
508 readTessCoord(i
->getDef(0)->asLValue(), i
->getSrc(0)->reg
.data
.sv
.index
);
511 if (prog
->getType() == Program::TYPE_TESSELLATION_EVAL
)
512 vtx
= bld
.mkOp1v(OP_PFETCH
, TYPE_U32
, bld
.getSSA(), bld
.mkImm(0));
513 ld
= bld
.mkFetch(i
->getDef(0), i
->dType
,
514 FILE_SHADER_INPUT
, addr
, i
->getIndirect(0, 0), vtx
);
515 ld
->perPatch
= i
->perPatch
;
518 bld
.getBB()->remove(i
);
523 NVC0LoweringPass::handleDIV(Instruction
*i
)
525 if (!isFloatType(i
->dType
))
527 Instruction
*rcp
= bld
.mkOp1(OP_RCP
, i
->dType
, bld
.getSSA(), i
->getSrc(1));
529 i
->setSrc(1, rcp
->getDef(0));
534 NVC0LoweringPass::handleMOD(Instruction
*i
)
536 if (i
->dType
!= TYPE_F32
)
538 LValue
*value
= bld
.getScratch();
539 bld
.mkOp1(OP_RCP
, TYPE_F32
, value
, i
->getSrc(1));
540 bld
.mkOp2(OP_MUL
, TYPE_F32
, value
, i
->getSrc(0), value
);
541 bld
.mkOp1(OP_TRUNC
, TYPE_F32
, value
, value
);
542 bld
.mkOp2(OP_MUL
, TYPE_F32
, value
, i
->getSrc(1), value
);
549 NVC0LoweringPass::handleSQRT(Instruction
*i
)
551 Instruction
*rsq
= bld
.mkOp1(OP_RSQ
, TYPE_F32
,
552 bld
.getSSA(), i
->getSrc(0));
554 i
->setSrc(1, rsq
->getDef(0));
560 NVC0LoweringPass::handlePOW(Instruction
*i
)
562 LValue
*val
= bld
.getScratch();
564 bld
.mkOp1(OP_LG2
, TYPE_F32
, val
, i
->getSrc(0));
565 bld
.mkOp2(OP_MUL
, TYPE_F32
, val
, i
->getSrc(1), val
)->dnz
= 1;
566 bld
.mkOp1(OP_PREEX2
, TYPE_F32
, val
, val
);
576 NVC0LoweringPass::handleEXPORT(Instruction
*i
)
578 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
579 int id
= i
->getSrc(0)->reg
.data
.offset
/ 4;
581 if (i
->src
[0].isIndirect(0)) // TODO, ugly
584 i
->src
[0].set(i
->src
[1]);
586 i
->setDef(0, new_LValue(func
, FILE_GPR
));
587 i
->getDef(0)->reg
.data
.id
= id
;
589 prog
->maxGPR
= MAX2(prog
->maxGPR
, id
);
591 if (prog
->getType() == Program::TYPE_GEOMETRY
) {
592 i
->setIndirect(0, 1, gpEmitAddress
);
598 NVC0LoweringPass::handleOUT(Instruction
*i
)
600 if (i
->op
== OP_RESTART
&& i
->prev
&& i
->prev
->op
== OP_EMIT
) {
601 i
->prev
->subOp
= NV50_IR_SUBOP_EMIT_RESTART
;
602 delete_Instruction(prog
, i
);
604 assert(gpEmitAddress
);
605 i
->setDef(0, gpEmitAddress
);
607 i
->setSrc(1, i
->getSrc(0));
608 i
->setSrc(0, gpEmitAddress
);
613 // Generate a binary predicate if an instruction is predicated by
614 // e.g. an f32 value.
616 NVC0LoweringPass::checkPredicate(Instruction
*insn
)
618 Value
*pred
= insn
->getPredicate();
621 if (!pred
|| pred
->reg
.file
== FILE_PREDICATE
)
623 pdst
= new_LValue(func
, FILE_PREDICATE
);
625 // CAUTION: don't use pdst->getInsn, the definition might not be unique,
626 // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
628 bld
.mkCmp(OP_SET
, CC_NEU
, TYPE_U32
, pdst
, bld
.mkImm(0), pred
);
630 insn
->setPredicate(insn
->cc
, pdst
);
634 // - add quadop dance for texturing
635 // - put FP outputs in GPRs
636 // - convert instruction sequences
639 NVC0LoweringPass::visit(Instruction
*i
)
642 bld
.setPosition(i
->prev
, true);
645 bld
.setPosition(i
->next
, false);
647 bld
.setPosition(i
->bb
, true);
649 if (i
->cc
!= CC_ALWAYS
)
659 return handleTEX(i
->asTex());
661 return handleTXD(i
->asTex());
663 bld
.mkOp1(OP_PREEX2
, TYPE_F32
, i
->getDef(0), i
->getSrc(0));
664 i
->setSrc(0, i
->getDef(0));
673 return handleSQRT(i
);
675 return handleEXPORT(i
);
680 return handleRDSV(i
);
682 return handleWRSV(i
);
684 if (i
->src
[0].getFile() == FILE_SHADER_INPUT
) {
686 assert(prog
->getType() != Program::TYPE_FRAGMENT
);
690 if (i
->getSrc(0)->reg
.data
.offset
>= 0x280 &&
691 i
->getSrc(0)->reg
.data
.offset
< 0x2c0)
692 i
->setInterpolate(i
->getSampleMode() | NV50_IR_INTERP_SC
);
695 if (i
->getSrc(0)->reg
.data
.offset
== 0x3fc) {
696 Value
*face
= i
->getDef(0);
697 bld
.setPosition(i
, true);
698 bld
.mkOp2(OP_SHL
, TYPE_U32
, face
, face
, bld
.mkImm(31));
699 bld
.mkOp2(OP_XOR
, TYPE_U32
, face
, face
, bld
.mkImm(0xbf800000));
709 TargetNVC0::runLegalizePass(Program
*prog
, CGStage stage
) const
711 if (stage
== CG_STAGE_PRE_SSA
) {
712 NVC0LoweringPass
pass(prog
);
713 return pass
.run(prog
, false, true);
715 if (stage
== CG_STAGE_POST_RA
) {
716 NVC0LegalizePostRA pass
;
717 return pass
.run(prog
, false, true);
719 if (stage
== CG_STAGE_SSA
) {
720 NVC0LegalizeSSA pass
;
721 return pass
.run(prog
, false, true);
726 } // namespace nv50_ir